1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type
{
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
68 return STMT_VINFO_VECTYPE (stmt_info
);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
76 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
77 basic_block bb
= gimple_bb (stmt
);
78 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
84 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
86 return (bb
->loop_father
== loop
->inner
);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
95 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
96 int misalign
, enum vect_cost_model_location where
)
100 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
101 stmt_info_for_cost si
= { count
, kind
,
102 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
104 body_cost_vec
->safe_push (si
);
106 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
110 count
, kind
, stmt_info
, misalign
, where
);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
118 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
128 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
129 tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
154 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
155 tree array
, unsigned HOST_WIDE_INT n
)
160 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
161 build_int_cst (size_type_node
, n
),
162 NULL_TREE
, NULL_TREE
);
164 new_stmt
= gimple_build_assign (array_ref
, vect
);
165 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
173 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
177 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
191 enum vect_relevant relevant
, bool live_p
)
193 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
194 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
195 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
196 gimple
*pattern_stmt
;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE
, vect_location
,
201 "mark relevant %d, live %d: ", relevant
, live_p
);
202 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE
, vect_location
,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info
= vinfo_for_stmt (pattern_stmt
);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
224 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
225 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
229 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
230 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
231 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
233 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE
, vect_location
,
238 "already marked relevant/live.\n");
242 worklist
->safe_push (stmt
);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
251 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
257 if (!is_gimple_assign (stmt
))
260 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
262 enum vect_def_type dt
= vect_uninitialized_def
;
264 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
268 "use not simple.\n");
272 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
291 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
292 enum vect_relevant
*relevant
, bool *live_p
)
294 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
296 imm_use_iterator imm_iter
;
300 *relevant
= vect_unused_in_scope
;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt
)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
306 != loop_exit_ctrl_vec_info_type
)
307 *relevant
= vect_used_in_scope
;
309 /* changing memory. */
310 if (gimple_code (stmt
) != GIMPLE_PHI
)
311 if (gimple_vdef (stmt
)
312 && !gimple_clobber_p (stmt
))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE
, vect_location
,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant
= vect_used_in_scope
;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
323 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
325 basic_block bb
= gimple_bb (USE_STMT (use_p
));
326 if (!flow_bb_inside_loop_p (loop
, bb
))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE
, vect_location
,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p
)))
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
338 gcc_assert (bb
== single_exit (loop
)->dest
);
345 if (*live_p
&& *relevant
== vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE
, vect_location
,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant
= vect_used_only_live
;
354 return (*live_p
|| *relevant
);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
364 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
367 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info
))
375 /* STMT has a data_ref. FORNOW this means that its of one of
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt
))
390 if (is_gimple_call (stmt
)
391 && gimple_call_internal_p (stmt
))
392 switch (gimple_call_internal_fn (stmt
))
395 operand
= gimple_call_arg (stmt
, 3);
400 operand
= gimple_call_arg (stmt
, 2);
410 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
412 operand
= gimple_assign_rhs1 (stmt
);
413 if (TREE_CODE (operand
) != SSA_NAME
)
424 Function process_use.
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
451 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
452 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
455 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
456 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
457 stmt_vec_info dstmt_vinfo
;
458 basic_block bb
, def_bb
;
460 enum vect_def_type dt
;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
467 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
471 "not vectorized: unsupported use in stmt.\n");
475 if (!def_stmt
|| gimple_nop_p (def_stmt
))
478 def_bb
= gimple_bb (def_stmt
);
479 if (!flow_bb_inside_loop_p (loop
, def_bb
))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
492 bb
= gimple_bb (stmt
);
493 if (gimple_code (stmt
) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
495 && gimple_code (def_stmt
) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
497 && bb
->loop_father
== def_bb
->loop_father
)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE
, vect_location
,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
503 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
517 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE
, vect_location
,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
525 case vect_unused_in_scope
:
526 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
527 vect_used_in_scope
: vect_unused_in_scope
;
530 case vect_used_in_outer_by_reduction
:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
532 relevant
= vect_used_by_reduction
;
535 case vect_used_in_outer
:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
537 relevant
= vect_used_in_scope
;
540 case vect_used_in_scope
:
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE
, vect_location
,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
563 case vect_unused_in_scope
:
564 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
566 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
569 case vect_used_by_reduction
:
570 case vect_used_only_live
:
571 relevant
= vect_used_in_outer_by_reduction
;
574 case vect_used_in_scope
:
575 relevant
= vect_used_in_outer
;
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. */
585 else if (gimple_code (stmt
) == GIMPLE_PHI
586 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
587 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
590 if (dump_enabled_p ())
591 dump_printf_loc (MSG_NOTE
, vect_location
,
592 "induction value on backedge.\n");
597 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
602 /* Function vect_mark_stmts_to_be_vectorized.
604 Not all stmts in the loop need to be vectorized. For example:
613 Stmt 1 and 3 do not need to be vectorized, because loop control and
614 addressing of vectorized data-refs are handled differently.
616 This pass detects such stmts. */
619 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
621 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
622 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
623 unsigned int nbbs
= loop
->num_nodes
;
624 gimple_stmt_iterator si
;
627 stmt_vec_info stmt_vinfo
;
631 enum vect_relevant relevant
;
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE
, vect_location
,
635 "=== vect_mark_stmts_to_be_vectorized ===\n");
637 auto_vec
<gimple
*, 64> worklist
;
639 /* 1. Init worklist. */
640 for (i
= 0; i
< nbbs
; i
++)
643 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
646 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
649 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
652 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
653 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
655 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
657 stmt
= gsi_stmt (si
);
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
661 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
664 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
665 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
669 /* 2. Process_worklist */
670 while (worklist
.length () > 0)
675 stmt
= worklist
.pop ();
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
679 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
682 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
683 (DEF_STMT) as relevant/irrelevant according to the relevance property
685 stmt_vinfo
= vinfo_for_stmt (stmt
);
686 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
688 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
689 propagated as is to the DEF_STMTs of its USEs.
691 One exception is when STMT has been identified as defining a reduction
692 variable; in this case we set the relevance to vect_used_by_reduction.
693 This is because we distinguish between two kinds of relevant stmts -
694 those that are used by a reduction computation, and those that are
695 (also) used by a regular computation. This allows us later on to
696 identify stmts that are used solely by a reduction, and therefore the
697 order of the results that they produce does not have to be kept. */
699 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
701 case vect_reduction_def
:
702 gcc_assert (relevant
!= vect_unused_in_scope
);
703 if (relevant
!= vect_unused_in_scope
704 && relevant
!= vect_used_in_scope
705 && relevant
!= vect_used_by_reduction
706 && relevant
!= vect_used_only_live
)
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
710 "unsupported use of reduction.\n");
715 case vect_nested_cycle
:
716 if (relevant
!= vect_unused_in_scope
717 && relevant
!= vect_used_in_outer_by_reduction
718 && relevant
!= vect_used_in_outer
)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "unsupported use of nested cycle.\n");
728 case vect_double_reduction_def
:
729 if (relevant
!= vect_unused_in_scope
730 && relevant
!= vect_used_by_reduction
731 && relevant
!= vect_used_only_live
)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
735 "unsupported use of double reduction.\n");
745 if (is_pattern_stmt_p (stmt_vinfo
))
747 /* Pattern statements are not inserted into the code, so
748 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
749 have to scan the RHS or function arguments instead. */
750 if (is_gimple_assign (stmt
))
752 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
753 tree op
= gimple_assign_rhs1 (stmt
);
756 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
758 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
759 relevant
, &worklist
, false)
760 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
761 relevant
, &worklist
, false))
765 for (; i
< gimple_num_ops (stmt
); i
++)
767 op
= gimple_op (stmt
, i
);
768 if (TREE_CODE (op
) == SSA_NAME
769 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
774 else if (is_gimple_call (stmt
))
776 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
778 tree arg
= gimple_call_arg (stmt
, i
);
779 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
786 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
788 tree op
= USE_FROM_PTR (use_p
);
789 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
794 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
796 gather_scatter_info gs_info
;
797 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
799 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
803 } /* while worklist */
809 /* Function vect_model_simple_cost.
811 Models cost for simple operations, i.e. those that only emit ncopies of a
812 single op. Right now, this does not account for multiple insns that could
813 be generated for the single vector op. We will handle that shortly. */
816 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
817 enum vect_def_type
*dt
,
819 stmt_vector_for_cost
*prologue_cost_vec
,
820 stmt_vector_for_cost
*body_cost_vec
)
823 int inside_cost
= 0, prologue_cost
= 0;
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info
))
829 /* Cost the "broadcast" of a scalar operand in to a vector operand.
830 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
832 for (i
= 0; i
< ndts
; i
++)
833 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
834 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
835 stmt_info
, 0, vect_prologue
);
837 /* Pass the inside-of-loop statements to the target-specific cost model. */
838 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
839 stmt_info
, 0, vect_body
);
841 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE
, vect_location
,
843 "vect_model_simple_cost: inside_cost = %d, "
844 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
848 /* Model cost for type demotion and promotion operations. PWR is normally
849 zero for single-step promotions and demotions. It will be one if
850 two-step promotion/demotion is required, and so on. Each additional
851 step doubles the number of instructions required. */
854 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
855 enum vect_def_type
*dt
, int pwr
)
858 int inside_cost
= 0, prologue_cost
= 0;
859 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
860 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
861 void *target_cost_data
;
863 /* The SLP costs were already calculated during SLP tree build. */
864 if (PURE_SLP_STMT (stmt_info
))
868 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
870 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
872 for (i
= 0; i
< pwr
+ 1; i
++)
874 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
876 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
877 vec_promote_demote
, stmt_info
, 0,
881 /* FORNOW: Assuming maximum 2 args per stmts. */
882 for (i
= 0; i
< 2; i
++)
883 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
884 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
885 stmt_info
, 0, vect_prologue
);
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_NOTE
, vect_location
,
889 "vect_model_promotion_demotion_cost: inside_cost = %d, "
890 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
893 /* Function vect_model_store_cost
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
899 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
900 vect_memory_access_type memory_access_type
,
901 enum vect_def_type dt
, slp_tree slp_node
,
902 stmt_vector_for_cost
*prologue_cost_vec
,
903 stmt_vector_for_cost
*body_cost_vec
)
905 unsigned int inside_cost
= 0, prologue_cost
= 0;
906 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
907 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
908 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
910 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
911 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
912 stmt_info
, 0, vect_prologue
);
914 /* Grouped stores update all elements in the group at once,
915 so we want the DR for the first statement. */
916 if (!slp_node
&& grouped_access_p
)
918 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
919 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
922 /* True if we should include any once-per-group costs as well as
923 the cost of the statement itself. For SLP we only get called
924 once per group anyhow. */
925 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
927 /* We assume that the cost of a single store-lanes instruction is
928 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
929 access is instead being provided by a permute-and-store operation,
930 include the cost of the permutes. */
932 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
934 /* Uses a high and low interleave or shuffle operations for each
936 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
937 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
938 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
939 stmt_info
, 0, vect_body
);
941 if (dump_enabled_p ())
942 dump_printf_loc (MSG_NOTE
, vect_location
,
943 "vect_model_store_cost: strided group_size = %d .\n",
947 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
948 /* Costs of the stores. */
949 if (memory_access_type
== VMAT_ELEMENTWISE
950 || memory_access_type
== VMAT_GATHER_SCATTER
)
951 /* N scalar stores plus extracting the elements. */
952 inside_cost
+= record_stmt_cost (body_cost_vec
,
953 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
954 scalar_store
, stmt_info
, 0, vect_body
);
956 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
958 if (memory_access_type
== VMAT_ELEMENTWISE
959 || memory_access_type
== VMAT_STRIDED_SLP
)
960 inside_cost
+= record_stmt_cost (body_cost_vec
,
961 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
962 vec_to_scalar
, stmt_info
, 0, vect_body
);
964 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE
, vect_location
,
966 "vect_model_store_cost: inside_cost = %d, "
967 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
971 /* Calculate cost of DR's memory access. */
973 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
974 unsigned int *inside_cost
,
975 stmt_vector_for_cost
*body_cost_vec
)
977 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
978 gimple
*stmt
= DR_STMT (dr
);
979 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
981 switch (alignment_support_scheme
)
985 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
986 vector_store
, stmt_info
, 0,
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE
, vect_location
,
991 "vect_model_store_cost: aligned.\n");
995 case dr_unaligned_supported
:
997 /* Here, we assign an additional cost for the unaligned store. */
998 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
999 unaligned_store
, stmt_info
,
1000 DR_MISALIGNMENT (dr
), vect_body
);
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE
, vect_location
,
1003 "vect_model_store_cost: unaligned supported by "
1008 case dr_unaligned_unsupported
:
1010 *inside_cost
= VECT_MAX_COST
;
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1014 "vect_model_store_cost: unsupported access.\n");
1024 /* Function vect_model_load_cost
1026 Models cost for loads. In the case of grouped accesses, one access has
1027 the overhead of the grouped access attributed to it. Since unaligned
1028 accesses are supported for loads, we also account for the costs of the
1029 access scheme chosen. */
1032 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1033 vect_memory_access_type memory_access_type
,
1035 stmt_vector_for_cost
*prologue_cost_vec
,
1036 stmt_vector_for_cost
*body_cost_vec
)
1038 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1039 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1040 unsigned int inside_cost
= 0, prologue_cost
= 0;
1041 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1043 /* Grouped loads read all elements in the group at once,
1044 so we want the DR for the first statement. */
1045 if (!slp_node
&& grouped_access_p
)
1047 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1048 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1051 /* True if we should include any once-per-group costs as well as
1052 the cost of the statement itself. For SLP we only get called
1053 once per group anyhow. */
1054 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1056 /* We assume that the cost of a single load-lanes instruction is
1057 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1058 access is instead being provided by a load-and-permute operation,
1059 include the cost of the permutes. */
1061 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1063 /* Uses an even and odd extract operations or shuffle operations
1064 for each needed permute. */
1065 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1066 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1067 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1068 stmt_info
, 0, vect_body
);
1070 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_NOTE
, vect_location
,
1072 "vect_model_load_cost: strided group_size = %d .\n",
1076 /* The loads themselves. */
1077 if (memory_access_type
== VMAT_ELEMENTWISE
1078 || memory_access_type
== VMAT_GATHER_SCATTER
)
1080 /* N scalar loads plus gathering them into a vector. */
1081 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1082 inside_cost
+= record_stmt_cost (body_cost_vec
,
1083 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1084 scalar_load
, stmt_info
, 0, vect_body
);
1087 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1088 &inside_cost
, &prologue_cost
,
1089 prologue_cost_vec
, body_cost_vec
, true);
1090 if (memory_access_type
== VMAT_ELEMENTWISE
1091 || memory_access_type
== VMAT_STRIDED_SLP
)
1092 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1093 stmt_info
, 0, vect_body
);
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE
, vect_location
,
1097 "vect_model_load_cost: inside_cost = %d, "
1098 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1102 /* Calculate cost of DR's memory access. */
1104 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1105 bool add_realign_cost
, unsigned int *inside_cost
,
1106 unsigned int *prologue_cost
,
1107 stmt_vector_for_cost
*prologue_cost_vec
,
1108 stmt_vector_for_cost
*body_cost_vec
,
1109 bool record_prologue_costs
)
1111 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1112 gimple
*stmt
= DR_STMT (dr
);
1113 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1115 switch (alignment_support_scheme
)
1119 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1120 stmt_info
, 0, vect_body
);
1122 if (dump_enabled_p ())
1123 dump_printf_loc (MSG_NOTE
, vect_location
,
1124 "vect_model_load_cost: aligned.\n");
1128 case dr_unaligned_supported
:
1130 /* Here, we assign an additional cost for the unaligned load. */
1131 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1132 unaligned_load
, stmt_info
,
1133 DR_MISALIGNMENT (dr
), vect_body
);
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE
, vect_location
,
1137 "vect_model_load_cost: unaligned supported by "
1142 case dr_explicit_realign
:
1144 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1145 vector_load
, stmt_info
, 0, vect_body
);
1146 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1147 vec_perm
, stmt_info
, 0, vect_body
);
1149 /* FIXME: If the misalignment remains fixed across the iterations of
1150 the containing loop, the following cost should be added to the
1152 if (targetm
.vectorize
.builtin_mask_for_load
)
1153 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1154 stmt_info
, 0, vect_body
);
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_NOTE
, vect_location
,
1158 "vect_model_load_cost: explicit realign\n");
1162 case dr_explicit_realign_optimized
:
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE
, vect_location
,
1166 "vect_model_load_cost: unaligned software "
1169 /* Unaligned software pipeline has a load of an address, an initial
1170 load, and possibly a mask operation to "prime" the loop. However,
1171 if this is an access in a group of loads, which provide grouped
1172 access, then the above cost should only be considered for one
1173 access in the group. Inside the loop, there is a load op
1174 and a realignment op. */
1176 if (add_realign_cost
&& record_prologue_costs
)
1178 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1179 vector_stmt
, stmt_info
,
1181 if (targetm
.vectorize
.builtin_mask_for_load
)
1182 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1183 vector_stmt
, stmt_info
,
1187 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1188 stmt_info
, 0, vect_body
);
1189 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1190 stmt_info
, 0, vect_body
);
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE
, vect_location
,
1194 "vect_model_load_cost: explicit realign optimized"
1200 case dr_unaligned_unsupported
:
1202 *inside_cost
= VECT_MAX_COST
;
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1206 "vect_model_load_cost: unsupported access.\n");
1215 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1216 the loop preheader for the vectorized stmt STMT. */
1219 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1222 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1225 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1226 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1230 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1234 if (nested_in_vect_loop_p (loop
, stmt
))
1237 pe
= loop_preheader_edge (loop
);
1238 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1239 gcc_assert (!new_bb
);
1243 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1245 gimple_stmt_iterator gsi_bb_start
;
1247 gcc_assert (bb_vinfo
);
1248 bb
= BB_VINFO_BB (bb_vinfo
);
1249 gsi_bb_start
= gsi_after_labels (bb
);
1250 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1254 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE
, vect_location
,
1257 "created new init_stmt: ");
1258 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1262 /* Function vect_init_vector.
1264 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1265 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1266 vector type a vector with all elements equal to VAL is created first.
1267 Place the initialization at BSI if it is not NULL. Otherwise, place the
1268 initialization at the loop preheader.
1269 Return the DEF of INIT_STMT.
1270 It will be used in the vectorization of STMT. */
1273 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1278 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1279 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1281 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1282 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1284 /* Scalar boolean value should be transformed into
1285 all zeros or all ones value before building a vector. */
1286 if (VECTOR_BOOLEAN_TYPE_P (type
))
1288 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1289 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1291 if (CONSTANT_CLASS_P (val
))
1292 val
= integer_zerop (val
) ? false_val
: true_val
;
1295 new_temp
= make_ssa_name (TREE_TYPE (type
));
1296 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1297 val
, true_val
, false_val
);
1298 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1302 else if (CONSTANT_CLASS_P (val
))
1303 val
= fold_convert (TREE_TYPE (type
), val
);
1306 new_temp
= make_ssa_name (TREE_TYPE (type
));
1307 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1308 init_stmt
= gimple_build_assign (new_temp
,
1309 fold_build1 (VIEW_CONVERT_EXPR
,
1313 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1314 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1318 val
= build_vector_from_val (type
, val
);
1321 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1322 init_stmt
= gimple_build_assign (new_temp
, val
);
1323 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1327 /* Function vect_get_vec_def_for_operand_1.
1329 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1330 DT that will be used in the vectorized stmt. */
1333 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1337 stmt_vec_info def_stmt_info
= NULL
;
1341 /* operand is a constant or a loop invariant. */
1342 case vect_constant_def
:
1343 case vect_external_def
:
1344 /* Code should use vect_get_vec_def_for_operand. */
1347 /* operand is defined inside the loop. */
1348 case vect_internal_def
:
1350 /* Get the def from the vectorized stmt. */
1351 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1353 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1354 /* Get vectorized pattern statement. */
1356 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1357 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1358 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1359 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1360 gcc_assert (vec_stmt
);
1361 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1362 vec_oprnd
= PHI_RESULT (vec_stmt
);
1363 else if (is_gimple_call (vec_stmt
))
1364 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1366 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1370 /* operand is defined by a loop header phi - reduction */
1371 case vect_reduction_def
:
1372 case vect_double_reduction_def
:
1373 case vect_nested_cycle
:
1374 /* Code should use get_initial_def_for_reduction. */
1377 /* operand is defined by loop-header phi - induction. */
1378 case vect_induction_def
:
1380 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1382 /* Get the def from the vectorized stmt. */
1383 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1384 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1385 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1386 vec_oprnd
= PHI_RESULT (vec_stmt
);
1388 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1398 /* Function vect_get_vec_def_for_operand.
1400 OP is an operand in STMT. This function returns a (vector) def that will be
1401 used in the vectorized stmt for STMT.
1403 In the case that OP is an SSA_NAME which is defined in the loop, then
1404 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1406 In case OP is an invariant or constant, a new stmt that creates a vector def
1407 needs to be introduced. VECTYPE may be used to specify a required type for
1408 vector invariant. */
1411 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1414 enum vect_def_type dt
;
1416 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1417 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1419 if (dump_enabled_p ())
1421 dump_printf_loc (MSG_NOTE
, vect_location
,
1422 "vect_get_vec_def_for_operand: ");
1423 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1424 dump_printf (MSG_NOTE
, "\n");
1427 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1428 gcc_assert (is_simple_use
);
1429 if (def_stmt
&& dump_enabled_p ())
1431 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1432 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1435 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1437 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1441 vector_type
= vectype
;
1442 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1443 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1444 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1446 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1448 gcc_assert (vector_type
);
1449 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1452 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1456 /* Function vect_get_vec_def_for_stmt_copy
1458 Return a vector-def for an operand. This function is used when the
1459 vectorized stmt to be created (by the caller to this function) is a "copy"
1460 created in case the vectorized result cannot fit in one vector, and several
1461 copies of the vector-stmt are required. In this case the vector-def is
1462 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1463 of the stmt that defines VEC_OPRND.
1464 DT is the type of the vector def VEC_OPRND.
1467 In case the vectorization factor (VF) is bigger than the number
1468 of elements that can fit in a vectype (nunits), we have to generate
1469 more than one vector stmt to vectorize the scalar stmt. This situation
1470 arises when there are multiple data-types operated upon in the loop; the
1471 smallest data-type determines the VF, and as a result, when vectorizing
1472 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1473 vector stmt (each computing a vector of 'nunits' results, and together
1474 computing 'VF' results in each iteration). This function is called when
1475 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1476 which VF=16 and nunits=4, so the number of copies required is 4):
1478 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1480 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1481 VS1.1: vx.1 = memref1 VS1.2
1482 VS1.2: vx.2 = memref2 VS1.3
1483 VS1.3: vx.3 = memref3
1485 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1486 VSnew.1: vz1 = vx.1 + ... VSnew.2
1487 VSnew.2: vz2 = vx.2 + ... VSnew.3
1488 VSnew.3: vz3 = vx.3 + ...
1490 The vectorization of S1 is explained in vectorizable_load.
1491 The vectorization of S2:
1492 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1493 the function 'vect_get_vec_def_for_operand' is called to
1494 get the relevant vector-def for each operand of S2. For operand x it
1495 returns the vector-def 'vx.0'.
1497 To create the remaining copies of the vector-stmt (VSnew.j), this
1498 function is called to get the relevant vector-def for each operand. It is
1499 obtained from the respective VS1.j stmt, which is recorded in the
1500 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1502 For example, to obtain the vector-def 'vx.1' in order to create the
1503 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1504 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1505 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1506 and return its def ('vx.1').
1507 Overall, to create the above sequence this function will be called 3 times:
1508 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1509 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1510 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1513 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1515 gimple
*vec_stmt_for_operand
;
1516 stmt_vec_info def_stmt_info
;
1518 /* Do nothing; can reuse same def. */
1519 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1522 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1523 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1524 gcc_assert (def_stmt_info
);
1525 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1526 gcc_assert (vec_stmt_for_operand
);
1527 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1528 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1530 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1535 /* Get vectorized definitions for the operands to create a copy of an original
1536 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1539 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1540 vec
<tree
> *vec_oprnds0
,
1541 vec
<tree
> *vec_oprnds1
)
1543 tree vec_oprnd
= vec_oprnds0
->pop ();
1545 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1546 vec_oprnds0
->quick_push (vec_oprnd
);
1548 if (vec_oprnds1
&& vec_oprnds1
->length ())
1550 vec_oprnd
= vec_oprnds1
->pop ();
1551 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1552 vec_oprnds1
->quick_push (vec_oprnd
);
1557 /* Get vectorized definitions for OP0 and OP1.
1558 REDUC_INDEX is the index of reduction operand in case of reduction,
1559 and -1 otherwise. */
1562 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1563 vec
<tree
> *vec_oprnds0
,
1564 vec
<tree
> *vec_oprnds1
,
1565 slp_tree slp_node
, int reduc_index
)
1569 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1570 auto_vec
<tree
> ops (nops
);
1571 auto_vec
<vec
<tree
> > vec_defs (nops
);
1573 ops
.quick_push (op0
);
1575 ops
.quick_push (op1
);
1577 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1579 *vec_oprnds0
= vec_defs
[0];
1581 *vec_oprnds1
= vec_defs
[1];
1587 vec_oprnds0
->create (1);
1588 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1589 vec_oprnds0
->quick_push (vec_oprnd
);
1593 vec_oprnds1
->create (1);
1594 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1595 vec_oprnds1
->quick_push (vec_oprnd
);
1601 /* Function vect_finish_stmt_generation.
1603 Insert a new stmt. */
1606 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1607 gimple_stmt_iterator
*gsi
)
1609 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1610 vec_info
*vinfo
= stmt_info
->vinfo
;
1612 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1614 if (!gsi_end_p (*gsi
)
1615 && gimple_has_mem_ops (vec_stmt
))
1617 gimple
*at_stmt
= gsi_stmt (*gsi
);
1618 tree vuse
= gimple_vuse (at_stmt
);
1619 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1621 tree vdef
= gimple_vdef (at_stmt
);
1622 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1623 /* If we have an SSA vuse and insert a store, update virtual
1624 SSA form to avoid triggering the renamer. Do so only
1625 if we can easily see all uses - which is what almost always
1626 happens with the way vectorized stmts are inserted. */
1627 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1628 && ((is_gimple_assign (vec_stmt
)
1629 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1630 || (is_gimple_call (vec_stmt
)
1631 && !(gimple_call_flags (vec_stmt
)
1632 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1634 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1635 gimple_set_vdef (vec_stmt
, new_vdef
);
1636 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1640 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1642 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1644 if (dump_enabled_p ())
1646 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1647 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1650 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1652 /* While EH edges will generally prevent vectorization, stmt might
1653 e.g. be in a must-not-throw region. Ensure newly created stmts
1654 that could throw are part of the same region. */
1655 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1656 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1657 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1660 /* We want to vectorize a call to combined function CFN with function
1661 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1662 as the types of all inputs. Check whether this is possible using
1663 an internal function, returning its code if so or IFN_LAST if not. */
1666 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1667 tree vectype_out
, tree vectype_in
)
1670 if (internal_fn_p (cfn
))
1671 ifn
= as_internal_fn (cfn
);
1673 ifn
= associated_internal_fn (fndecl
);
1674 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1676 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1677 if (info
.vectorizable
)
1679 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1680 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1681 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1682 OPTIMIZE_FOR_SPEED
))
1690 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1691 gimple_stmt_iterator
*);
1693 /* STMT is a non-strided load or store, meaning that it accesses
1694 elements with a known constant step. Return -1 if that step
1695 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1698 compare_step_with_zero (gimple
*stmt
)
1700 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1701 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1703 if (loop_vinfo
&& nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), stmt
))
1704 step
= STMT_VINFO_DR_STEP (stmt_info
);
1706 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
1707 return tree_int_cst_compare (step
, size_zero_node
);
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1714 perm_mask_for_reverse (tree vectype
)
1719 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1720 sel
= XALLOCAVEC (unsigned char, nunits
);
1722 for (i
= 0; i
< nunits
; ++i
)
1723 sel
[i
] = nunits
- 1 - i
;
1725 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
1727 return vect_gen_perm_mask_checked (vectype
, sel
);
1730 /* A subroutine of get_load_store_type, with a subset of the same
1731 arguments. Handle the case where STMT is part of a grouped load
1734 For stores, the statements in the group are all consecutive
1735 and there is no gap at the end. For loads, the statements in the
1736 group might not be consecutive; there can be gaps between statements
1737 as well as at the end. */
1740 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1741 vec_load_store_type vls_type
,
1742 vect_memory_access_type
*memory_access_type
)
1744 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1745 vec_info
*vinfo
= stmt_info
->vinfo
;
1746 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1747 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1748 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1749 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1750 bool single_element_p
= (stmt
== first_stmt
1751 && !GROUP_NEXT_ELEMENT (stmt_info
));
1752 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1753 unsigned nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1755 /* True if the vectorized statements would access beyond the last
1756 statement in the group. */
1757 bool overrun_p
= false;
1759 /* True if we can cope with such overrun by peeling for gaps, so that
1760 there is at least one final scalar iteration after the vector loop. */
1761 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1763 /* There can only be a gap at the end of the group if the stride is
1764 known at compile time. */
1765 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1767 /* Stores can't yet have gaps. */
1768 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1772 if (STMT_VINFO_STRIDED_P (stmt_info
))
1774 /* Try to use consecutive accesses of GROUP_SIZE elements,
1775 separated by the stride, until we have a complete vector.
1776 Fall back to scalar accesses if that isn't possible. */
1777 if (nunits
% group_size
== 0)
1778 *memory_access_type
= VMAT_STRIDED_SLP
;
1780 *memory_access_type
= VMAT_ELEMENTWISE
;
1784 overrun_p
= loop_vinfo
&& gap
!= 0;
1785 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1788 "Grouped store with gaps requires"
1789 " non-consecutive accesses\n");
1792 /* If the access is aligned an overrun is fine. */
1795 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
))))
1797 if (overrun_p
&& !can_overrun_p
)
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1801 "Peeling for outer loop is not supported\n");
1804 *memory_access_type
= VMAT_CONTIGUOUS
;
1809 /* We can always handle this case using elementwise accesses,
1810 but see if something more efficient is available. */
1811 *memory_access_type
= VMAT_ELEMENTWISE
;
1813 /* If there is a gap at the end of the group then these optimizations
1814 would access excess elements in the last iteration. */
1815 bool would_overrun_p
= (gap
!= 0);
1816 /* If the access is aligned an overrun is fine, but only if the
1817 overrun is not inside an unused vector (if the gap is as large
1818 or larger than a vector). */
1822 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
))))
1823 would_overrun_p
= false;
1824 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1825 && (can_overrun_p
|| !would_overrun_p
)
1826 && compare_step_with_zero (stmt
) > 0)
1828 /* First try using LOAD/STORE_LANES. */
1829 if (vls_type
== VLS_LOAD
1830 ? vect_load_lanes_supported (vectype
, group_size
)
1831 : vect_store_lanes_supported (vectype
, group_size
))
1833 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1834 overrun_p
= would_overrun_p
;
1837 /* If that fails, try using permuting loads. */
1838 if (*memory_access_type
== VMAT_ELEMENTWISE
1839 && (vls_type
== VLS_LOAD
1840 ? vect_grouped_load_supported (vectype
, single_element_p
,
1842 : vect_grouped_store_supported (vectype
, group_size
)))
1844 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1845 overrun_p
= would_overrun_p
;
1850 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1852 /* STMT is the leader of the group. Check the operands of all the
1853 stmts of the group. */
1854 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1857 gcc_assert (gimple_assign_single_p (next_stmt
));
1858 tree op
= gimple_assign_rhs1 (next_stmt
);
1860 enum vect_def_type dt
;
1861 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1865 "use not simple.\n");
1868 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1874 gcc_assert (can_overrun_p
);
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1877 "Data access with gaps requires scalar "
1879 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1885 /* A subroutine of get_load_store_type, with a subset of the same
1886 arguments. Handle the case where STMT is a load or store that
1887 accesses consecutive elements with a negative step. */
1889 static vect_memory_access_type
1890 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1891 vec_load_store_type vls_type
,
1892 unsigned int ncopies
)
1894 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1895 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1896 dr_alignment_support alignment_support_scheme
;
1900 if (dump_enabled_p ())
1901 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1902 "multiple types with negative step.\n");
1903 return VMAT_ELEMENTWISE
;
1906 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1907 if (alignment_support_scheme
!= dr_aligned
1908 && alignment_support_scheme
!= dr_unaligned_supported
)
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1912 "negative step but alignment required.\n");
1913 return VMAT_ELEMENTWISE
;
1916 if (vls_type
== VLS_STORE_INVARIANT
)
1918 if (dump_enabled_p ())
1919 dump_printf_loc (MSG_NOTE
, vect_location
,
1920 "negative step with invariant source;"
1921 " no permute needed.\n");
1922 return VMAT_CONTIGUOUS_DOWN
;
1925 if (!perm_mask_for_reverse (vectype
))
1927 if (dump_enabled_p ())
1928 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1929 "negative step and reversing not supported.\n");
1930 return VMAT_ELEMENTWISE
;
1933 return VMAT_CONTIGUOUS_REVERSE
;
1936 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1937 if there is a memory access type that the vectorized form can use,
1938 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1939 or scatters, fill in GS_INFO accordingly.
1941 SLP says whether we're performing SLP rather than loop vectorization.
1942 VECTYPE is the vector type that the vectorized statements will use.
1943 NCOPIES is the number of vector statements that will be needed. */
1946 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1947 vec_load_store_type vls_type
, unsigned int ncopies
,
1948 vect_memory_access_type
*memory_access_type
,
1949 gather_scatter_info
*gs_info
)
1951 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1952 vec_info
*vinfo
= stmt_info
->vinfo
;
1953 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1954 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1956 *memory_access_type
= VMAT_GATHER_SCATTER
;
1958 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1960 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1961 &gs_info
->offset_dt
,
1962 &gs_info
->offset_vectype
))
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1966 "%s index use not simple.\n",
1967 vls_type
== VLS_LOAD
? "gather" : "scatter");
1971 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1973 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1974 memory_access_type
))
1977 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1980 *memory_access_type
= VMAT_ELEMENTWISE
;
1984 int cmp
= compare_step_with_zero (stmt
);
1986 *memory_access_type
= get_negative_load_store_type
1987 (stmt
, vectype
, vls_type
, ncopies
);
1990 gcc_assert (vls_type
== VLS_LOAD
);
1991 *memory_access_type
= VMAT_INVARIANT
;
1994 *memory_access_type
= VMAT_CONTIGUOUS
;
1997 /* FIXME: At the moment the cost model seems to underestimate the
1998 cost of using elementwise accesses. This check preserves the
1999 traditional behavior until that can be fixed. */
2000 if (*memory_access_type
== VMAT_ELEMENTWISE
2001 && !STMT_VINFO_STRIDED_P (stmt_info
))
2003 if (dump_enabled_p ())
2004 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2005 "not falling back to elementwise accesses\n");
2011 /* Function vectorizable_mask_load_store.
2013 Check if STMT performs a conditional load or store that can be vectorized.
2014 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2015 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2016 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2019 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2020 gimple
**vec_stmt
, slp_tree slp_node
)
2022 tree vec_dest
= NULL
;
2023 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2024 stmt_vec_info prev_stmt_info
;
2025 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2026 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2027 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2028 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2029 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2030 tree rhs_vectype
= NULL_TREE
;
2035 tree dataref_ptr
= NULL_TREE
;
2037 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2041 gather_scatter_info gs_info
;
2042 vec_load_store_type vls_type
;
2045 enum vect_def_type dt
;
2047 if (slp_node
!= NULL
)
2050 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2051 gcc_assert (ncopies
>= 1);
2053 mask
= gimple_call_arg (stmt
, 2);
2055 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2058 /* FORNOW. This restriction should be relaxed. */
2059 if (nested_in_vect_loop
&& ncopies
> 1)
2061 if (dump_enabled_p ())
2062 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2063 "multiple types in nested loop.");
2067 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2070 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2074 if (!STMT_VINFO_DATA_REF (stmt_info
))
2077 elem_type
= TREE_TYPE (vectype
);
2079 if (TREE_CODE (mask
) != SSA_NAME
)
2082 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2086 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2088 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2089 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2092 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2094 tree rhs
= gimple_call_arg (stmt
, 3);
2095 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2097 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2098 vls_type
= VLS_STORE_INVARIANT
;
2100 vls_type
= VLS_STORE
;
2103 vls_type
= VLS_LOAD
;
2105 vect_memory_access_type memory_access_type
;
2106 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2107 &memory_access_type
, &gs_info
))
2110 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2112 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2114 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2115 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2119 "masked gather with integer mask not supported.");
2123 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2125 if (dump_enabled_p ())
2126 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2127 "unsupported access type for masked %s.\n",
2128 vls_type
== VLS_LOAD
? "load" : "store");
2131 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2132 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2133 TYPE_MODE (mask_vectype
),
2134 vls_type
== VLS_LOAD
)
2136 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2139 if (!vec_stmt
) /* transformation not required. */
2141 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2142 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2143 if (vls_type
== VLS_LOAD
)
2144 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2147 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2148 dt
, NULL
, NULL
, NULL
);
2151 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2155 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2157 tree vec_oprnd0
= NULL_TREE
, op
;
2158 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2159 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2160 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2161 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2162 tree mask_perm_mask
= NULL_TREE
;
2163 edge pe
= loop_preheader_edge (loop
);
2166 enum { NARROW
, NONE
, WIDEN
} modifier
;
2167 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2169 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2170 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2171 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2172 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2173 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2174 scaletype
= TREE_VALUE (arglist
);
2175 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2176 && types_compatible_p (srctype
, masktype
));
2178 if (nunits
== gather_off_nunits
)
2180 else if (nunits
== gather_off_nunits
/ 2)
2182 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
2185 for (i
= 0; i
< gather_off_nunits
; ++i
)
2186 sel
[i
] = i
| nunits
;
2188 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
2190 else if (nunits
== gather_off_nunits
* 2)
2192 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
2195 for (i
= 0; i
< nunits
; ++i
)
2196 sel
[i
] = i
< gather_off_nunits
2197 ? i
: i
+ nunits
- gather_off_nunits
;
2199 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
2201 for (i
= 0; i
< nunits
; ++i
)
2202 sel
[i
] = i
| gather_off_nunits
;
2203 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
2208 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2210 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2211 if (!is_gimple_min_invariant (ptr
))
2213 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2214 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2215 gcc_assert (!new_bb
);
2218 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2220 prev_stmt_info
= NULL
;
2221 for (j
= 0; j
< ncopies
; ++j
)
2223 if (modifier
== WIDEN
&& (j
& 1))
2224 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2225 perm_mask
, stmt
, gsi
);
2228 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2231 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2233 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2235 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2236 == TYPE_VECTOR_SUBPARTS (idxtype
));
2237 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2238 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2240 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2241 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2245 if (mask_perm_mask
&& (j
& 1))
2246 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2247 mask_perm_mask
, stmt
, gsi
);
2251 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2254 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2255 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2259 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2261 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2262 == TYPE_VECTOR_SUBPARTS (masktype
));
2263 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2264 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2266 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2267 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2273 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2276 if (!useless_type_conversion_p (vectype
, rettype
))
2278 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2279 == TYPE_VECTOR_SUBPARTS (rettype
));
2280 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2281 gimple_call_set_lhs (new_stmt
, op
);
2282 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2283 var
= make_ssa_name (vec_dest
);
2284 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2285 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2289 var
= make_ssa_name (vec_dest
, new_stmt
);
2290 gimple_call_set_lhs (new_stmt
, var
);
2293 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2295 if (modifier
== NARROW
)
2302 var
= permute_vec_elements (prev_res
, var
,
2303 perm_mask
, stmt
, gsi
);
2304 new_stmt
= SSA_NAME_DEF_STMT (var
);
2307 if (prev_stmt_info
== NULL
)
2308 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2310 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2311 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2314 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2316 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2318 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2319 stmt_info
= vinfo_for_stmt (stmt
);
2321 tree lhs
= gimple_call_lhs (stmt
);
2322 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2323 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2324 set_vinfo_for_stmt (stmt
, NULL
);
2325 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2326 gsi_replace (gsi
, new_stmt
, true);
2329 else if (vls_type
!= VLS_LOAD
)
2331 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2332 prev_stmt_info
= NULL
;
2333 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2334 for (i
= 0; i
< ncopies
; i
++)
2336 unsigned align
, misalign
;
2340 tree rhs
= gimple_call_arg (stmt
, 3);
2341 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2342 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2343 /* We should have catched mismatched types earlier. */
2344 gcc_assert (useless_type_conversion_p (vectype
,
2345 TREE_TYPE (vec_rhs
)));
2346 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2347 NULL_TREE
, &dummy
, gsi
,
2348 &ptr_incr
, false, &inv_p
);
2349 gcc_assert (!inv_p
);
2353 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2354 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2355 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2356 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2357 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2358 TYPE_SIZE_UNIT (vectype
));
2361 align
= TYPE_ALIGN_UNIT (vectype
);
2362 if (aligned_access_p (dr
))
2364 else if (DR_MISALIGNMENT (dr
) == -1)
2366 align
= TYPE_ALIGN_UNIT (elem_type
);
2370 misalign
= DR_MISALIGNMENT (dr
);
2371 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2373 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2374 misalign
? least_bit_hwi (misalign
) : align
);
2376 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2377 ptr
, vec_mask
, vec_rhs
);
2378 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2380 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2382 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2383 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2388 tree vec_mask
= NULL_TREE
;
2389 prev_stmt_info
= NULL
;
2390 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2391 for (i
= 0; i
< ncopies
; i
++)
2393 unsigned align
, misalign
;
2397 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2398 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2399 NULL_TREE
, &dummy
, gsi
,
2400 &ptr_incr
, false, &inv_p
);
2401 gcc_assert (!inv_p
);
2405 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2406 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2407 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2408 TYPE_SIZE_UNIT (vectype
));
2411 align
= TYPE_ALIGN_UNIT (vectype
);
2412 if (aligned_access_p (dr
))
2414 else if (DR_MISALIGNMENT (dr
) == -1)
2416 align
= TYPE_ALIGN_UNIT (elem_type
);
2420 misalign
= DR_MISALIGNMENT (dr
);
2421 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2423 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2424 misalign
? least_bit_hwi (misalign
) : align
);
2426 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2428 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2429 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2431 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2433 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2434 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2438 if (vls_type
== VLS_LOAD
)
2440 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2442 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2444 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2445 stmt_info
= vinfo_for_stmt (stmt
);
2447 tree lhs
= gimple_call_lhs (stmt
);
2448 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2449 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2450 set_vinfo_for_stmt (stmt
, NULL
);
2451 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2452 gsi_replace (gsi
, new_stmt
, true);
2458 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2461 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2462 gimple
**vec_stmt
, slp_tree slp_node
,
2463 tree vectype_in
, enum vect_def_type
*dt
)
2466 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2467 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2468 unsigned ncopies
, nunits
;
2470 op
= gimple_call_arg (stmt
, 0);
2471 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2472 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2474 /* Multiple types in SLP are handled by creating the appropriate number of
2475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2480 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2482 gcc_assert (ncopies
>= 1);
2484 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2489 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype
));
2490 unsigned char *elt
= elts
;
2491 unsigned word_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
) / nunits
;
2492 for (unsigned i
= 0; i
< nunits
; ++i
)
2493 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2494 *elt
++ = (i
+ 1) * word_bytes
- j
- 1;
2496 if (! can_vec_perm_p (TYPE_MODE (char_vectype
), false, elts
))
2501 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2502 if (dump_enabled_p ())
2503 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2505 if (! PURE_SLP_STMT (stmt_info
))
2507 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2508 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2509 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2510 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2515 tree
*telts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (char_vectype
));
2516 for (unsigned i
= 0; i
< TYPE_VECTOR_SUBPARTS (char_vectype
); ++i
)
2517 telts
[i
] = build_int_cst (char_type_node
, elts
[i
]);
2518 tree bswap_vconst
= build_vector (char_vectype
, telts
);
2521 vec
<tree
> vec_oprnds
= vNULL
;
2522 gimple
*new_stmt
= NULL
;
2523 stmt_vec_info prev_stmt_info
= NULL
;
2524 for (unsigned j
= 0; j
< ncopies
; j
++)
2528 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2530 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2532 /* Arguments are ready. create the new vector stmt. */
2535 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2537 tree tem
= make_ssa_name (char_vectype
);
2538 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2539 char_vectype
, vop
));
2540 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2541 tree tem2
= make_ssa_name (char_vectype
);
2542 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2543 tem
, tem
, bswap_vconst
);
2544 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2545 tem
= make_ssa_name (vectype
);
2546 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2548 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2550 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2557 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2559 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2561 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2564 vec_oprnds
.release ();
2568 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2569 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2570 in a single step. On success, store the binary pack code in
2574 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2575 tree_code
*convert_code
)
2577 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2578 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2582 int multi_step_cvt
= 0;
2583 auto_vec
<tree
, 8> interm_types
;
2584 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2585 &code
, &multi_step_cvt
,
2590 *convert_code
= code
;
2594 /* Function vectorizable_call.
2596 Check if GS performs a function call that can be vectorized.
2597 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2598 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2599 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2602 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2609 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2610 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2611 tree vectype_out
, vectype_in
;
2614 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2615 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2616 vec_info
*vinfo
= stmt_info
->vinfo
;
2617 tree fndecl
, new_temp
, rhs_type
;
2619 enum vect_def_type dt
[3]
2620 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2622 gimple
*new_stmt
= NULL
;
2624 vec
<tree
> vargs
= vNULL
;
2625 enum { NARROW
, NONE
, WIDEN
} modifier
;
2629 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2632 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2636 /* Is GS a vectorizable call? */
2637 stmt
= dyn_cast
<gcall
*> (gs
);
2641 if (gimple_call_internal_p (stmt
)
2642 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2643 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2644 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2647 if (gimple_call_lhs (stmt
) == NULL_TREE
2648 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2651 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2653 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2655 /* Process function arguments. */
2656 rhs_type
= NULL_TREE
;
2657 vectype_in
= NULL_TREE
;
2658 nargs
= gimple_call_num_args (stmt
);
2660 /* Bail out if the function has more than three arguments, we do not have
2661 interesting builtin functions to vectorize with more than two arguments
2662 except for fma. No arguments is also not good. */
2663 if (nargs
== 0 || nargs
> 3)
2666 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2667 if (gimple_call_internal_p (stmt
)
2668 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2671 rhs_type
= unsigned_type_node
;
2674 for (i
= 0; i
< nargs
; i
++)
2678 op
= gimple_call_arg (stmt
, i
);
2680 /* We can only handle calls with arguments of the same type. */
2682 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2684 if (dump_enabled_p ())
2685 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2686 "argument types differ.\n");
2690 rhs_type
= TREE_TYPE (op
);
2692 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2694 if (dump_enabled_p ())
2695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2696 "use not simple.\n");
2701 vectype_in
= opvectype
;
2703 && opvectype
!= vectype_in
)
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2707 "argument vector types differ.\n");
2711 /* If all arguments are external or constant defs use a vector type with
2712 the same size as the output vector type. */
2714 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2716 gcc_assert (vectype_in
);
2719 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2722 "no vectype for scalar type ");
2723 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2724 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2731 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2732 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2733 if (nunits_in
== nunits_out
/ 2)
2735 else if (nunits_out
== nunits_in
)
2737 else if (nunits_out
== nunits_in
/ 2)
2742 /* We only handle functions that do not read or clobber memory. */
2743 if (gimple_vuse (stmt
))
2745 if (dump_enabled_p ())
2746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2747 "function reads from or writes to memory.\n");
2751 /* For now, we only vectorize functions if a target specific builtin
2752 is available. TODO -- in some cases, it might be profitable to
2753 insert the calls for pieces of the vector, in order to be able
2754 to vectorize other operations in the loop. */
2756 internal_fn ifn
= IFN_LAST
;
2757 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2758 tree callee
= gimple_call_fndecl (stmt
);
2760 /* First try using an internal function. */
2761 tree_code convert_code
= ERROR_MARK
;
2763 && (modifier
== NONE
2764 || (modifier
== NARROW
2765 && simple_integer_narrowing (vectype_out
, vectype_in
,
2767 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2770 /* If that fails, try asking for a target-specific built-in function. */
2771 if (ifn
== IFN_LAST
)
2773 if (cfn
!= CFN_LAST
)
2774 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2775 (cfn
, vectype_out
, vectype_in
);
2777 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2778 (callee
, vectype_out
, vectype_in
);
2781 if (ifn
== IFN_LAST
&& !fndecl
)
2783 if (cfn
== CFN_GOMP_SIMD_LANE
2786 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2787 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2788 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2789 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2791 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2792 { 0, 1, 2, ... vf - 1 } vector. */
2793 gcc_assert (nargs
== 0);
2795 else if (modifier
== NONE
2796 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2797 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2798 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2799 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2803 if (dump_enabled_p ())
2804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2805 "function is not vectorizable.\n");
2812 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2813 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2815 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2817 /* Sanity check: make sure that at least one copy of the vectorized stmt
2818 needs to be generated. */
2819 gcc_assert (ncopies
>= 1);
2821 if (!vec_stmt
) /* transformation not required. */
2823 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2824 if (dump_enabled_p ())
2825 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2827 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
2828 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2829 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2830 vec_promote_demote
, stmt_info
, 0, vect_body
);
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2841 scalar_dest
= gimple_call_lhs (stmt
);
2842 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2844 prev_stmt_info
= NULL
;
2845 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2847 tree prev_res
= NULL_TREE
;
2848 for (j
= 0; j
< ncopies
; ++j
)
2850 /* Build argument list for the vectorized call. */
2852 vargs
.create (nargs
);
2858 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2859 vec
<tree
> vec_oprnds0
;
2861 for (i
= 0; i
< nargs
; i
++)
2862 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2863 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2864 vec_oprnds0
= vec_defs
[0];
2866 /* Arguments are ready. Create the new vector stmt. */
2867 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2870 for (k
= 0; k
< nargs
; k
++)
2872 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2873 vargs
[k
] = vec_oprndsk
[i
];
2875 if (modifier
== NARROW
)
2877 tree half_res
= make_ssa_name (vectype_in
);
2878 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2879 gimple_call_set_lhs (new_stmt
, half_res
);
2880 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2883 prev_res
= half_res
;
2886 new_temp
= make_ssa_name (vec_dest
);
2887 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2888 prev_res
, half_res
);
2892 if (ifn
!= IFN_LAST
)
2893 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2895 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2896 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2897 gimple_call_set_lhs (new_stmt
, new_temp
);
2899 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2900 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2903 for (i
= 0; i
< nargs
; i
++)
2905 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2906 vec_oprndsi
.release ();
2911 for (i
= 0; i
< nargs
; i
++)
2913 op
= gimple_call_arg (stmt
, i
);
2916 = vect_get_vec_def_for_operand (op
, stmt
);
2919 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2921 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2924 vargs
.quick_push (vec_oprnd0
);
2927 if (gimple_call_internal_p (stmt
)
2928 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2930 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2932 for (k
= 0; k
< nunits_out
; ++k
)
2933 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2934 tree cst
= build_vector (vectype_out
, v
);
2936 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2937 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2938 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2939 new_temp
= make_ssa_name (vec_dest
);
2940 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2942 else if (modifier
== NARROW
)
2944 tree half_res
= make_ssa_name (vectype_in
);
2945 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2946 gimple_call_set_lhs (new_stmt
, half_res
);
2947 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2950 prev_res
= half_res
;
2953 new_temp
= make_ssa_name (vec_dest
);
2954 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2955 prev_res
, half_res
);
2959 if (ifn
!= IFN_LAST
)
2960 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2962 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2963 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2964 gimple_call_set_lhs (new_stmt
, new_temp
);
2966 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2968 if (j
== (modifier
== NARROW
? 1 : 0))
2969 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2971 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2973 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2976 else if (modifier
== NARROW
)
2978 for (j
= 0; j
< ncopies
; ++j
)
2980 /* Build argument list for the vectorized call. */
2982 vargs
.create (nargs
* 2);
2988 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2989 vec
<tree
> vec_oprnds0
;
2991 for (i
= 0; i
< nargs
; i
++)
2992 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2993 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2994 vec_oprnds0
= vec_defs
[0];
2996 /* Arguments are ready. Create the new vector stmt. */
2997 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3001 for (k
= 0; k
< nargs
; k
++)
3003 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3004 vargs
.quick_push (vec_oprndsk
[i
]);
3005 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3007 if (ifn
!= IFN_LAST
)
3008 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
3010 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3011 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3012 gimple_call_set_lhs (new_stmt
, new_temp
);
3013 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3014 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3017 for (i
= 0; i
< nargs
; i
++)
3019 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3020 vec_oprndsi
.release ();
3025 for (i
= 0; i
< nargs
; i
++)
3027 op
= gimple_call_arg (stmt
, i
);
3031 = vect_get_vec_def_for_operand (op
, stmt
);
3033 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3037 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3039 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3041 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3044 vargs
.quick_push (vec_oprnd0
);
3045 vargs
.quick_push (vec_oprnd1
);
3048 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3049 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3050 gimple_call_set_lhs (new_stmt
, new_temp
);
3051 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3054 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3056 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3058 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3061 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3064 /* No current target implements this case. */
3069 /* The call in STMT might prevent it from being removed in dce.
3070 We however cannot remove it here, due to the way the ssa name
3071 it defines is mapped to the new definition. So just replace
3072 rhs of the statement with something harmless. */
3077 type
= TREE_TYPE (scalar_dest
);
3078 if (is_pattern_stmt_p (stmt_info
))
3079 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3081 lhs
= gimple_call_lhs (stmt
);
3083 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3084 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3085 set_vinfo_for_stmt (stmt
, NULL
);
3086 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3087 gsi_replace (gsi
, new_stmt
, false);
3093 struct simd_call_arg_info
3097 HOST_WIDE_INT linear_step
;
3098 enum vect_def_type dt
;
3100 bool simd_lane_linear
;
3103 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3104 is linear within simd lane (but not within whole loop), note it in
3108 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3109 struct simd_call_arg_info
*arginfo
)
3111 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3113 if (!is_gimple_assign (def_stmt
)
3114 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3115 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3118 tree base
= gimple_assign_rhs1 (def_stmt
);
3119 HOST_WIDE_INT linear_step
= 0;
3120 tree v
= gimple_assign_rhs2 (def_stmt
);
3121 while (TREE_CODE (v
) == SSA_NAME
)
3124 def_stmt
= SSA_NAME_DEF_STMT (v
);
3125 if (is_gimple_assign (def_stmt
))
3126 switch (gimple_assign_rhs_code (def_stmt
))
3129 t
= gimple_assign_rhs2 (def_stmt
);
3130 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3132 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3133 v
= gimple_assign_rhs1 (def_stmt
);
3136 t
= gimple_assign_rhs2 (def_stmt
);
3137 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3139 linear_step
= tree_to_shwi (t
);
3140 v
= gimple_assign_rhs1 (def_stmt
);
3143 t
= gimple_assign_rhs1 (def_stmt
);
3144 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3145 || (TYPE_PRECISION (TREE_TYPE (v
))
3146 < TYPE_PRECISION (TREE_TYPE (t
))))
3155 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3157 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3158 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3163 arginfo
->linear_step
= linear_step
;
3165 arginfo
->simd_lane_linear
= true;
3171 /* Function vectorizable_simd_clone_call.
3173 Check if STMT performs a function call that can be vectorized
3174 by calling a simd clone of the function.
3175 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3176 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3177 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3180 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3181 gimple
**vec_stmt
, slp_tree slp_node
)
3186 tree vec_oprnd0
= NULL_TREE
;
3187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3189 unsigned int nunits
;
3190 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3191 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3192 vec_info
*vinfo
= stmt_info
->vinfo
;
3193 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3194 tree fndecl
, new_temp
;
3196 gimple
*new_stmt
= NULL
;
3198 auto_vec
<simd_call_arg_info
> arginfo
;
3199 vec
<tree
> vargs
= vNULL
;
3201 tree lhs
, rtype
, ratype
;
3202 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
3204 /* Is STMT a vectorizable call? */
3205 if (!is_gimple_call (stmt
))
3208 fndecl
= gimple_call_fndecl (stmt
);
3209 if (fndecl
== NULL_TREE
)
3212 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3213 if (node
== NULL
|| node
->simd_clones
== NULL
)
3216 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3219 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3223 if (gimple_call_lhs (stmt
)
3224 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3227 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3229 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3231 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3238 /* Process function arguments. */
3239 nargs
= gimple_call_num_args (stmt
);
3241 /* Bail out if the function has zero arguments. */
3245 arginfo
.reserve (nargs
, true);
3247 for (i
= 0; i
< nargs
; i
++)
3249 simd_call_arg_info thisarginfo
;
3252 thisarginfo
.linear_step
= 0;
3253 thisarginfo
.align
= 0;
3254 thisarginfo
.op
= NULL_TREE
;
3255 thisarginfo
.simd_lane_linear
= false;
3257 op
= gimple_call_arg (stmt
, i
);
3258 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3259 &thisarginfo
.vectype
)
3260 || thisarginfo
.dt
== vect_uninitialized_def
)
3262 if (dump_enabled_p ())
3263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3264 "use not simple.\n");
3268 if (thisarginfo
.dt
== vect_constant_def
3269 || thisarginfo
.dt
== vect_external_def
)
3270 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3272 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3274 /* For linear arguments, the analyze phase should have saved
3275 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3276 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3277 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3279 gcc_assert (vec_stmt
);
3280 thisarginfo
.linear_step
3281 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3283 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3284 thisarginfo
.simd_lane_linear
3285 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3286 == boolean_true_node
);
3287 /* If loop has been peeled for alignment, we need to adjust it. */
3288 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3289 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3290 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3292 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3293 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3294 tree opt
= TREE_TYPE (thisarginfo
.op
);
3295 bias
= fold_convert (TREE_TYPE (step
), bias
);
3296 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3298 = fold_build2 (POINTER_TYPE_P (opt
)
3299 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3300 thisarginfo
.op
, bias
);
3304 && thisarginfo
.dt
!= vect_constant_def
3305 && thisarginfo
.dt
!= vect_external_def
3307 && TREE_CODE (op
) == SSA_NAME
3308 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3310 && tree_fits_shwi_p (iv
.step
))
3312 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3313 thisarginfo
.op
= iv
.base
;
3315 else if ((thisarginfo
.dt
== vect_constant_def
3316 || thisarginfo
.dt
== vect_external_def
)
3317 && POINTER_TYPE_P (TREE_TYPE (op
)))
3318 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3319 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3321 if (POINTER_TYPE_P (TREE_TYPE (op
))
3322 && !thisarginfo
.linear_step
3324 && thisarginfo
.dt
!= vect_constant_def
3325 && thisarginfo
.dt
!= vect_external_def
3328 && TREE_CODE (op
) == SSA_NAME
)
3329 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3331 arginfo
.quick_push (thisarginfo
);
3334 unsigned int badness
= 0;
3335 struct cgraph_node
*bestn
= NULL
;
3336 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3337 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3339 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3340 n
= n
->simdclone
->next_clone
)
3342 unsigned int this_badness
= 0;
3343 if (n
->simdclone
->simdlen
3344 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3345 || n
->simdclone
->nargs
!= nargs
)
3347 if (n
->simdclone
->simdlen
3348 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3349 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3350 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3351 if (n
->simdclone
->inbranch
)
3352 this_badness
+= 2048;
3353 int target_badness
= targetm
.simd_clone
.usable (n
);
3354 if (target_badness
< 0)
3356 this_badness
+= target_badness
* 512;
3357 /* FORNOW: Have to add code to add the mask argument. */
3358 if (n
->simdclone
->inbranch
)
3360 for (i
= 0; i
< nargs
; i
++)
3362 switch (n
->simdclone
->args
[i
].arg_type
)
3364 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3365 if (!useless_type_conversion_p
3366 (n
->simdclone
->args
[i
].orig_type
,
3367 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3369 else if (arginfo
[i
].dt
== vect_constant_def
3370 || arginfo
[i
].dt
== vect_external_def
3371 || arginfo
[i
].linear_step
)
3374 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3375 if (arginfo
[i
].dt
!= vect_constant_def
3376 && arginfo
[i
].dt
!= vect_external_def
)
3379 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3380 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3381 if (arginfo
[i
].dt
== vect_constant_def
3382 || arginfo
[i
].dt
== vect_external_def
3383 || (arginfo
[i
].linear_step
3384 != n
->simdclone
->args
[i
].linear_step
))
3387 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3388 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3389 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3390 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3391 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3392 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3396 case SIMD_CLONE_ARG_TYPE_MASK
:
3399 if (i
== (size_t) -1)
3401 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3406 if (arginfo
[i
].align
)
3407 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3408 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3410 if (i
== (size_t) -1)
3412 if (bestn
== NULL
|| this_badness
< badness
)
3415 badness
= this_badness
;
3422 for (i
= 0; i
< nargs
; i
++)
3423 if ((arginfo
[i
].dt
== vect_constant_def
3424 || arginfo
[i
].dt
== vect_external_def
)
3425 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3428 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3430 if (arginfo
[i
].vectype
== NULL
3431 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3432 > bestn
->simdclone
->simdlen
))
3436 fndecl
= bestn
->decl
;
3437 nunits
= bestn
->simdclone
->simdlen
;
3438 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3440 /* If the function isn't const, only allow it in simd loops where user
3441 has asserted that at least nunits consecutive iterations can be
3442 performed using SIMD instructions. */
3443 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3444 && gimple_vuse (stmt
))
3447 /* Sanity check: make sure that at least one copy of the vectorized stmt
3448 needs to be generated. */
3449 gcc_assert (ncopies
>= 1);
3451 if (!vec_stmt
) /* transformation not required. */
3453 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3454 for (i
= 0; i
< nargs
; i
++)
3455 if ((bestn
->simdclone
->args
[i
].arg_type
3456 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3457 || (bestn
->simdclone
->args
[i
].arg_type
3458 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3460 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3462 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3463 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3464 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3465 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3466 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3467 tree sll
= arginfo
[i
].simd_lane_linear
3468 ? boolean_true_node
: boolean_false_node
;
3469 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3471 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3472 if (dump_enabled_p ())
3473 dump_printf_loc (MSG_NOTE
, vect_location
,
3474 "=== vectorizable_simd_clone_call ===\n");
3475 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3481 if (dump_enabled_p ())
3482 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3485 scalar_dest
= gimple_call_lhs (stmt
);
3486 vec_dest
= NULL_TREE
;
3491 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3492 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3493 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3496 rtype
= TREE_TYPE (ratype
);
3500 prev_stmt_info
= NULL
;
3501 for (j
= 0; j
< ncopies
; ++j
)
3503 /* Build argument list for the vectorized call. */
3505 vargs
.create (nargs
);
3509 for (i
= 0; i
< nargs
; i
++)
3511 unsigned int k
, l
, m
, o
;
3513 op
= gimple_call_arg (stmt
, i
);
3514 switch (bestn
->simdclone
->args
[i
].arg_type
)
3516 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3517 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3518 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3519 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3521 if (TYPE_VECTOR_SUBPARTS (atype
)
3522 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3524 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3525 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3526 / TYPE_VECTOR_SUBPARTS (atype
));
3527 gcc_assert ((k
& (k
- 1)) == 0);
3530 = vect_get_vec_def_for_operand (op
, stmt
);
3533 vec_oprnd0
= arginfo
[i
].op
;
3534 if ((m
& (k
- 1)) == 0)
3536 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3539 arginfo
[i
].op
= vec_oprnd0
;
3541 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3543 bitsize_int ((m
& (k
- 1)) * prec
));
3545 = gimple_build_assign (make_ssa_name (atype
),
3547 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3548 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3552 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3553 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3554 gcc_assert ((k
& (k
- 1)) == 0);
3555 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3557 vec_alloc (ctor_elts
, k
);
3560 for (l
= 0; l
< k
; l
++)
3562 if (m
== 0 && l
== 0)
3564 = vect_get_vec_def_for_operand (op
, stmt
);
3567 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3569 arginfo
[i
].op
= vec_oprnd0
;
3572 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3576 vargs
.safe_push (vec_oprnd0
);
3579 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3581 = gimple_build_assign (make_ssa_name (atype
),
3583 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3584 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3589 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3590 vargs
.safe_push (op
);
3592 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3593 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3598 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3603 edge pe
= loop_preheader_edge (loop
);
3604 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3605 gcc_assert (!new_bb
);
3607 if (arginfo
[i
].simd_lane_linear
)
3609 vargs
.safe_push (arginfo
[i
].op
);
3612 tree phi_res
= copy_ssa_name (op
);
3613 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3614 set_vinfo_for_stmt (new_phi
,
3615 new_stmt_vec_info (new_phi
, loop_vinfo
));
3616 add_phi_arg (new_phi
, arginfo
[i
].op
,
3617 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3619 = POINTER_TYPE_P (TREE_TYPE (op
))
3620 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3621 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3622 ? sizetype
: TREE_TYPE (op
);
3624 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3626 tree tcst
= wide_int_to_tree (type
, cst
);
3627 tree phi_arg
= copy_ssa_name (op
);
3629 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3630 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3631 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3632 set_vinfo_for_stmt (new_stmt
,
3633 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3634 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3636 arginfo
[i
].op
= phi_res
;
3637 vargs
.safe_push (phi_res
);
3642 = POINTER_TYPE_P (TREE_TYPE (op
))
3643 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3644 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3645 ? sizetype
: TREE_TYPE (op
);
3647 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3649 tree tcst
= wide_int_to_tree (type
, cst
);
3650 new_temp
= make_ssa_name (TREE_TYPE (op
));
3651 new_stmt
= gimple_build_assign (new_temp
, code
,
3652 arginfo
[i
].op
, tcst
);
3653 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3654 vargs
.safe_push (new_temp
);
3657 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3658 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3659 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3660 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3661 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3662 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3668 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3671 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3673 new_temp
= create_tmp_var (ratype
);
3674 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3675 == TYPE_VECTOR_SUBPARTS (rtype
))
3676 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3678 new_temp
= make_ssa_name (rtype
, new_stmt
);
3679 gimple_call_set_lhs (new_stmt
, new_temp
);
3681 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3685 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3688 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3689 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3690 gcc_assert ((k
& (k
- 1)) == 0);
3691 for (l
= 0; l
< k
; l
++)
3696 t
= build_fold_addr_expr (new_temp
);
3697 t
= build2 (MEM_REF
, vectype
, t
,
3698 build_int_cst (TREE_TYPE (t
),
3699 l
* prec
/ BITS_PER_UNIT
));
3702 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3703 size_int (prec
), bitsize_int (l
* prec
));
3705 = gimple_build_assign (make_ssa_name (vectype
), t
);
3706 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3707 if (j
== 0 && l
== 0)
3708 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3710 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3712 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3717 tree clobber
= build_constructor (ratype
, NULL
);
3718 TREE_THIS_VOLATILE (clobber
) = 1;
3719 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3720 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3724 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3726 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3727 / TYPE_VECTOR_SUBPARTS (rtype
));
3728 gcc_assert ((k
& (k
- 1)) == 0);
3729 if ((j
& (k
- 1)) == 0)
3730 vec_alloc (ret_ctor_elts
, k
);
3733 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3734 for (m
= 0; m
< o
; m
++)
3736 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3737 size_int (m
), NULL_TREE
, NULL_TREE
);
3739 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3740 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3741 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3742 gimple_assign_lhs (new_stmt
));
3744 tree clobber
= build_constructor (ratype
, NULL
);
3745 TREE_THIS_VOLATILE (clobber
) = 1;
3746 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3747 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3750 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3751 if ((j
& (k
- 1)) != k
- 1)
3753 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3755 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3756 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3758 if ((unsigned) j
== k
- 1)
3759 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3761 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3763 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3768 tree t
= build_fold_addr_expr (new_temp
);
3769 t
= build2 (MEM_REF
, vectype
, t
,
3770 build_int_cst (TREE_TYPE (t
), 0));
3772 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3773 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3774 tree clobber
= build_constructor (ratype
, NULL
);
3775 TREE_THIS_VOLATILE (clobber
) = 1;
3776 vect_finish_stmt_generation (stmt
,
3777 gimple_build_assign (new_temp
,
3783 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3785 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3787 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3792 /* The call in STMT might prevent it from being removed in dce.
3793 We however cannot remove it here, due to the way the ssa name
3794 it defines is mapped to the new definition. So just replace
3795 rhs of the statement with something harmless. */
3802 type
= TREE_TYPE (scalar_dest
);
3803 if (is_pattern_stmt_p (stmt_info
))
3804 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3806 lhs
= gimple_call_lhs (stmt
);
3807 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3810 new_stmt
= gimple_build_nop ();
3811 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3812 set_vinfo_for_stmt (stmt
, NULL
);
3813 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3814 gsi_replace (gsi
, new_stmt
, true);
3815 unlink_stmt_vdef (stmt
);
3821 /* Function vect_gen_widened_results_half
3823 Create a vector stmt whose code, type, number of arguments, and result
3824 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3825 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3826 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3827 needs to be created (DECL is a function-decl of a target-builtin).
3828 STMT is the original scalar stmt that we are vectorizing. */
3831 vect_gen_widened_results_half (enum tree_code code
,
3833 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3834 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3840 /* Generate half of the widened result: */
3841 if (code
== CALL_EXPR
)
3843 /* Target specific support */
3844 if (op_type
== binary_op
)
3845 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3847 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3848 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3849 gimple_call_set_lhs (new_stmt
, new_temp
);
3853 /* Generic support */
3854 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3855 if (op_type
!= binary_op
)
3857 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3858 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3859 gimple_assign_set_lhs (new_stmt
, new_temp
);
3861 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3867 /* Get vectorized definitions for loop-based vectorization. For the first
3868 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3869 scalar operand), and for the rest we get a copy with
3870 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3871 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3872 The vectors are collected into VEC_OPRNDS. */
3875 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3876 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3880 /* Get first vector operand. */
3881 /* All the vector operands except the very first one (that is scalar oprnd)
3883 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3884 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3886 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3888 vec_oprnds
->quick_push (vec_oprnd
);
3890 /* Get second vector operand. */
3891 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3892 vec_oprnds
->quick_push (vec_oprnd
);
3896 /* For conversion in multiple steps, continue to get operands
3899 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3903 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3904 For multi-step conversions store the resulting vectors and call the function
3908 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3909 int multi_step_cvt
, gimple
*stmt
,
3911 gimple_stmt_iterator
*gsi
,
3912 slp_tree slp_node
, enum tree_code code
,
3913 stmt_vec_info
*prev_stmt_info
)
3916 tree vop0
, vop1
, new_tmp
, vec_dest
;
3918 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3920 vec_dest
= vec_dsts
.pop ();
3922 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3924 /* Create demotion operation. */
3925 vop0
= (*vec_oprnds
)[i
];
3926 vop1
= (*vec_oprnds
)[i
+ 1];
3927 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3928 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3929 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3930 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3933 /* Store the resulting vector for next recursive call. */
3934 (*vec_oprnds
)[i
/2] = new_tmp
;
3937 /* This is the last step of the conversion sequence. Store the
3938 vectors in SLP_NODE or in vector info of the scalar statement
3939 (or in STMT_VINFO_RELATED_STMT chain). */
3941 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3944 if (!*prev_stmt_info
)
3945 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3947 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3949 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3954 /* For multi-step demotion operations we first generate demotion operations
3955 from the source type to the intermediate types, and then combine the
3956 results (stored in VEC_OPRNDS) in demotion operation to the destination
3960 /* At each level of recursion we have half of the operands we had at the
3962 vec_oprnds
->truncate ((i
+1)/2);
3963 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3964 stmt
, vec_dsts
, gsi
, slp_node
,
3965 VEC_PACK_TRUNC_EXPR
,
3969 vec_dsts
.quick_push (vec_dest
);
3973 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3974 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3975 the resulting vectors and call the function recursively. */
3978 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3979 vec
<tree
> *vec_oprnds1
,
3980 gimple
*stmt
, tree vec_dest
,
3981 gimple_stmt_iterator
*gsi
,
3982 enum tree_code code1
,
3983 enum tree_code code2
, tree decl1
,
3984 tree decl2
, int op_type
)
3987 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3988 gimple
*new_stmt1
, *new_stmt2
;
3989 vec
<tree
> vec_tmp
= vNULL
;
3991 vec_tmp
.create (vec_oprnds0
->length () * 2);
3992 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3994 if (op_type
== binary_op
)
3995 vop1
= (*vec_oprnds1
)[i
];
3999 /* Generate the two halves of promotion operation. */
4000 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4001 op_type
, vec_dest
, gsi
, stmt
);
4002 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4003 op_type
, vec_dest
, gsi
, stmt
);
4004 if (is_gimple_call (new_stmt1
))
4006 new_tmp1
= gimple_call_lhs (new_stmt1
);
4007 new_tmp2
= gimple_call_lhs (new_stmt2
);
4011 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4012 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4015 /* Store the results for the next step. */
4016 vec_tmp
.quick_push (new_tmp1
);
4017 vec_tmp
.quick_push (new_tmp2
);
4020 vec_oprnds0
->release ();
4021 *vec_oprnds0
= vec_tmp
;
4025 /* Check if STMT performs a conversion operation, that can be vectorized.
4026 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4027 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4028 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4031 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4032 gimple
**vec_stmt
, slp_tree slp_node
)
4036 tree op0
, op1
= NULL_TREE
;
4037 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4038 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4039 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4040 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4041 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4042 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4045 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4047 gimple
*new_stmt
= NULL
;
4048 stmt_vec_info prev_stmt_info
;
4051 tree vectype_out
, vectype_in
;
4053 tree lhs_type
, rhs_type
;
4054 enum { NARROW
, NONE
, WIDEN
} modifier
;
4055 vec
<tree
> vec_oprnds0
= vNULL
;
4056 vec
<tree
> vec_oprnds1
= vNULL
;
4058 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4059 vec_info
*vinfo
= stmt_info
->vinfo
;
4060 int multi_step_cvt
= 0;
4061 vec
<tree
> interm_types
= vNULL
;
4062 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4064 machine_mode rhs_mode
;
4065 unsigned short fltsz
;
4067 /* Is STMT a vectorizable conversion? */
4069 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4072 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4076 if (!is_gimple_assign (stmt
))
4079 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4082 code
= gimple_assign_rhs_code (stmt
);
4083 if (!CONVERT_EXPR_CODE_P (code
)
4084 && code
!= FIX_TRUNC_EXPR
4085 && code
!= FLOAT_EXPR
4086 && code
!= WIDEN_MULT_EXPR
4087 && code
!= WIDEN_LSHIFT_EXPR
)
4090 op_type
= TREE_CODE_LENGTH (code
);
4092 /* Check types of lhs and rhs. */
4093 scalar_dest
= gimple_assign_lhs (stmt
);
4094 lhs_type
= TREE_TYPE (scalar_dest
);
4095 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4097 op0
= gimple_assign_rhs1 (stmt
);
4098 rhs_type
= TREE_TYPE (op0
);
4100 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4101 && !((INTEGRAL_TYPE_P (lhs_type
)
4102 && INTEGRAL_TYPE_P (rhs_type
))
4103 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4104 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4107 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4108 && ((INTEGRAL_TYPE_P (lhs_type
)
4109 && (TYPE_PRECISION (lhs_type
)
4110 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
4111 || (INTEGRAL_TYPE_P (rhs_type
)
4112 && (TYPE_PRECISION (rhs_type
)
4113 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
4115 if (dump_enabled_p ())
4116 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4117 "type conversion to/from bit-precision unsupported."
4122 /* Check the operands of the operation. */
4123 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4125 if (dump_enabled_p ())
4126 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4127 "use not simple.\n");
4130 if (op_type
== binary_op
)
4134 op1
= gimple_assign_rhs2 (stmt
);
4135 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4136 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4138 if (CONSTANT_CLASS_P (op0
))
4139 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4141 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4145 if (dump_enabled_p ())
4146 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4147 "use not simple.\n");
4152 /* If op0 is an external or constant defs use a vector type of
4153 the same size as the output vector type. */
4155 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4157 gcc_assert (vectype_in
);
4160 if (dump_enabled_p ())
4162 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4163 "no vectype for scalar type ");
4164 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4165 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4171 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4172 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4174 if (dump_enabled_p ())
4176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4177 "can't convert between boolean and non "
4179 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4180 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4186 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4187 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4188 if (nunits_in
< nunits_out
)
4190 else if (nunits_out
== nunits_in
)
4195 /* Multiple types in SLP are handled by creating the appropriate number of
4196 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4200 else if (modifier
== NARROW
)
4201 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
4203 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4205 /* Sanity check: make sure that at least one copy of the vectorized stmt
4206 needs to be generated. */
4207 gcc_assert (ncopies
>= 1);
4209 /* Supportable by target? */
4213 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4215 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4220 if (dump_enabled_p ())
4221 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4222 "conversion not supported by target.\n");
4226 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4227 &code1
, &code2
, &multi_step_cvt
,
4230 /* Binary widening operation can only be supported directly by the
4232 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4236 if (code
!= FLOAT_EXPR
4237 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4238 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4241 rhs_mode
= TYPE_MODE (rhs_type
);
4242 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
4243 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
4244 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
4245 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
4248 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4249 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4250 if (cvt_type
== NULL_TREE
)
4253 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4255 if (!supportable_convert_operation (code
, vectype_out
,
4256 cvt_type
, &decl1
, &codecvt1
))
4259 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4260 cvt_type
, &codecvt1
,
4261 &codecvt2
, &multi_step_cvt
,
4265 gcc_assert (multi_step_cvt
== 0);
4267 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4268 vectype_in
, &code1
, &code2
,
4269 &multi_step_cvt
, &interm_types
))
4273 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
4276 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4277 codecvt2
= ERROR_MARK
;
4281 interm_types
.safe_push (cvt_type
);
4282 cvt_type
= NULL_TREE
;
4287 gcc_assert (op_type
== unary_op
);
4288 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4289 &code1
, &multi_step_cvt
,
4293 if (code
!= FIX_TRUNC_EXPR
4294 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4295 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4298 rhs_mode
= TYPE_MODE (rhs_type
);
4300 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4301 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4302 if (cvt_type
== NULL_TREE
)
4304 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4307 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4308 &code1
, &multi_step_cvt
,
4317 if (!vec_stmt
) /* transformation not required. */
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_NOTE
, vect_location
,
4321 "=== vectorizable_conversion ===\n");
4322 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4324 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4325 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4327 else if (modifier
== NARROW
)
4329 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4330 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4334 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4335 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4337 interm_types
.release ();
4342 if (dump_enabled_p ())
4343 dump_printf_loc (MSG_NOTE
, vect_location
,
4344 "transform conversion. ncopies = %d.\n", ncopies
);
4346 if (op_type
== binary_op
)
4348 if (CONSTANT_CLASS_P (op0
))
4349 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4350 else if (CONSTANT_CLASS_P (op1
))
4351 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4354 /* In case of multi-step conversion, we first generate conversion operations
4355 to the intermediate types, and then from that types to the final one.
4356 We create vector destinations for the intermediate type (TYPES) received
4357 from supportable_*_operation, and store them in the correct order
4358 for future use in vect_create_vectorized_*_stmts (). */
4359 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4360 vec_dest
= vect_create_destination_var (scalar_dest
,
4361 (cvt_type
&& modifier
== WIDEN
)
4362 ? cvt_type
: vectype_out
);
4363 vec_dsts
.quick_push (vec_dest
);
4367 for (i
= interm_types
.length () - 1;
4368 interm_types
.iterate (i
, &intermediate_type
); i
--)
4370 vec_dest
= vect_create_destination_var (scalar_dest
,
4372 vec_dsts
.quick_push (vec_dest
);
4377 vec_dest
= vect_create_destination_var (scalar_dest
,
4379 ? vectype_out
: cvt_type
);
4383 if (modifier
== WIDEN
)
4385 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4386 if (op_type
== binary_op
)
4387 vec_oprnds1
.create (1);
4389 else if (modifier
== NARROW
)
4390 vec_oprnds0
.create (
4391 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4393 else if (code
== WIDEN_LSHIFT_EXPR
)
4394 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4397 prev_stmt_info
= NULL
;
4401 for (j
= 0; j
< ncopies
; j
++)
4404 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
4407 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4409 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4411 /* Arguments are ready, create the new vector stmt. */
4412 if (code1
== CALL_EXPR
)
4414 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4415 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4416 gimple_call_set_lhs (new_stmt
, new_temp
);
4420 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4421 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4422 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4423 gimple_assign_set_lhs (new_stmt
, new_temp
);
4426 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4428 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4431 if (!prev_stmt_info
)
4432 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4434 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4435 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4442 /* In case the vectorization factor (VF) is bigger than the number
4443 of elements that we can fit in a vectype (nunits), we have to
4444 generate more than one vector stmt - i.e - we need to "unroll"
4445 the vector stmt by a factor VF/nunits. */
4446 for (j
= 0; j
< ncopies
; j
++)
4453 if (code
== WIDEN_LSHIFT_EXPR
)
4458 /* Store vec_oprnd1 for every vector stmt to be created
4459 for SLP_NODE. We check during the analysis that all
4460 the shift arguments are the same. */
4461 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4462 vec_oprnds1
.quick_push (vec_oprnd1
);
4464 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4468 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4469 &vec_oprnds1
, slp_node
, -1);
4473 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4474 vec_oprnds0
.quick_push (vec_oprnd0
);
4475 if (op_type
== binary_op
)
4477 if (code
== WIDEN_LSHIFT_EXPR
)
4480 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4481 vec_oprnds1
.quick_push (vec_oprnd1
);
4487 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4488 vec_oprnds0
.truncate (0);
4489 vec_oprnds0
.quick_push (vec_oprnd0
);
4490 if (op_type
== binary_op
)
4492 if (code
== WIDEN_LSHIFT_EXPR
)
4495 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4497 vec_oprnds1
.truncate (0);
4498 vec_oprnds1
.quick_push (vec_oprnd1
);
4502 /* Arguments are ready. Create the new vector stmts. */
4503 for (i
= multi_step_cvt
; i
>= 0; i
--)
4505 tree this_dest
= vec_dsts
[i
];
4506 enum tree_code c1
= code1
, c2
= code2
;
4507 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4512 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4514 stmt
, this_dest
, gsi
,
4515 c1
, c2
, decl1
, decl2
,
4519 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4523 if (codecvt1
== CALL_EXPR
)
4525 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4526 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4527 gimple_call_set_lhs (new_stmt
, new_temp
);
4531 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4532 new_temp
= make_ssa_name (vec_dest
);
4533 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4537 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4540 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4543 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4546 if (!prev_stmt_info
)
4547 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4549 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4550 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4555 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4559 /* In case the vectorization factor (VF) is bigger than the number
4560 of elements that we can fit in a vectype (nunits), we have to
4561 generate more than one vector stmt - i.e - we need to "unroll"
4562 the vector stmt by a factor VF/nunits. */
4563 for (j
= 0; j
< ncopies
; j
++)
4567 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4571 vec_oprnds0
.truncate (0);
4572 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4573 vect_pow2 (multi_step_cvt
) - 1);
4576 /* Arguments are ready. Create the new vector stmts. */
4578 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4580 if (codecvt1
== CALL_EXPR
)
4582 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4583 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4584 gimple_call_set_lhs (new_stmt
, new_temp
);
4588 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4589 new_temp
= make_ssa_name (vec_dest
);
4590 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4594 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4595 vec_oprnds0
[i
] = new_temp
;
4598 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4599 stmt
, vec_dsts
, gsi
,
4604 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4608 vec_oprnds0
.release ();
4609 vec_oprnds1
.release ();
4610 interm_types
.release ();
4616 /* Function vectorizable_assignment.
4618 Check if STMT performs an assignment (copy) that can be vectorized.
4619 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4620 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4621 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4624 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4625 gimple
**vec_stmt
, slp_tree slp_node
)
4630 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4631 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4634 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
4638 vec
<tree
> vec_oprnds
= vNULL
;
4640 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4641 vec_info
*vinfo
= stmt_info
->vinfo
;
4642 gimple
*new_stmt
= NULL
;
4643 stmt_vec_info prev_stmt_info
= NULL
;
4644 enum tree_code code
;
4647 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4650 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4654 /* Is vectorizable assignment? */
4655 if (!is_gimple_assign (stmt
))
4658 scalar_dest
= gimple_assign_lhs (stmt
);
4659 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4662 code
= gimple_assign_rhs_code (stmt
);
4663 if (gimple_assign_single_p (stmt
)
4664 || code
== PAREN_EXPR
4665 || CONVERT_EXPR_CODE_P (code
))
4666 op
= gimple_assign_rhs1 (stmt
);
4670 if (code
== VIEW_CONVERT_EXPR
)
4671 op
= TREE_OPERAND (op
, 0);
4673 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4674 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4676 /* Multiple types in SLP are handled by creating the appropriate number of
4677 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4682 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4684 gcc_assert (ncopies
>= 1);
4686 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4690 "use not simple.\n");
4694 /* We can handle NOP_EXPR conversions that do not change the number
4695 of elements or the vector size. */
4696 if ((CONVERT_EXPR_CODE_P (code
)
4697 || code
== VIEW_CONVERT_EXPR
)
4699 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4700 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4701 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4704 /* We do not handle bit-precision changes. */
4705 if ((CONVERT_EXPR_CODE_P (code
)
4706 || code
== VIEW_CONVERT_EXPR
)
4707 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4708 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4709 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4710 || ((TYPE_PRECISION (TREE_TYPE (op
))
4711 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4712 /* But a conversion that does not change the bit-pattern is ok. */
4713 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4714 > TYPE_PRECISION (TREE_TYPE (op
)))
4715 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4716 /* Conversion between boolean types of different sizes is
4717 a simple assignment in case their vectypes are same
4719 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4720 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4722 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4724 "type conversion to/from bit-precision "
4729 if (!vec_stmt
) /* transformation not required. */
4731 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4732 if (dump_enabled_p ())
4733 dump_printf_loc (MSG_NOTE
, vect_location
,
4734 "=== vectorizable_assignment ===\n");
4735 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4740 if (dump_enabled_p ())
4741 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4744 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4747 for (j
= 0; j
< ncopies
; j
++)
4751 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4753 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4755 /* Arguments are ready. create the new vector stmt. */
4756 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4758 if (CONVERT_EXPR_CODE_P (code
)
4759 || code
== VIEW_CONVERT_EXPR
)
4760 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4761 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4762 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4763 gimple_assign_set_lhs (new_stmt
, new_temp
);
4764 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4766 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4773 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4775 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4777 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4780 vec_oprnds
.release ();
4785 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4786 either as shift by a scalar or by a vector. */
4789 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4792 machine_mode vec_mode
;
4797 vectype
= get_vectype_for_scalar_type (scalar_type
);
4801 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4803 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4805 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4807 || (optab_handler (optab
, TYPE_MODE (vectype
))
4808 == CODE_FOR_nothing
))
4812 vec_mode
= TYPE_MODE (vectype
);
4813 icode
= (int) optab_handler (optab
, vec_mode
);
4814 if (icode
== CODE_FOR_nothing
)
4821 /* Function vectorizable_shift.
4823 Check if STMT performs a shift operation that can be vectorized.
4824 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4825 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4826 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4829 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4830 gimple
**vec_stmt
, slp_tree slp_node
)
4834 tree op0
, op1
= NULL
;
4835 tree vec_oprnd1
= NULL_TREE
;
4836 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4838 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4839 enum tree_code code
;
4840 machine_mode vec_mode
;
4844 machine_mode optab_op2_mode
;
4846 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4848 gimple
*new_stmt
= NULL
;
4849 stmt_vec_info prev_stmt_info
;
4856 vec
<tree
> vec_oprnds0
= vNULL
;
4857 vec
<tree
> vec_oprnds1
= vNULL
;
4860 bool scalar_shift_arg
= true;
4861 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4862 vec_info
*vinfo
= stmt_info
->vinfo
;
4865 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4868 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4872 /* Is STMT a vectorizable binary/unary operation? */
4873 if (!is_gimple_assign (stmt
))
4876 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4879 code
= gimple_assign_rhs_code (stmt
);
4881 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4882 || code
== RROTATE_EXPR
))
4885 scalar_dest
= gimple_assign_lhs (stmt
);
4886 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4887 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4888 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4890 if (dump_enabled_p ())
4891 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4892 "bit-precision shifts not supported.\n");
4896 op0
= gimple_assign_rhs1 (stmt
);
4897 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4899 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4901 "use not simple.\n");
4904 /* If op0 is an external or constant def use a vector type with
4905 the same size as the output vector type. */
4907 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4909 gcc_assert (vectype
);
4912 if (dump_enabled_p ())
4913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4914 "no vectype for scalar type\n");
4918 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4919 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4920 if (nunits_out
!= nunits_in
)
4923 op1
= gimple_assign_rhs2 (stmt
);
4924 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4926 if (dump_enabled_p ())
4927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4928 "use not simple.\n");
4933 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4937 /* Multiple types in SLP are handled by creating the appropriate number of
4938 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4943 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4945 gcc_assert (ncopies
>= 1);
4947 /* Determine whether the shift amount is a vector, or scalar. If the
4948 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4950 if ((dt
[1] == vect_internal_def
4951 || dt
[1] == vect_induction_def
)
4953 scalar_shift_arg
= false;
4954 else if (dt
[1] == vect_constant_def
4955 || dt
[1] == vect_external_def
4956 || dt
[1] == vect_internal_def
)
4958 /* In SLP, need to check whether the shift count is the same,
4959 in loops if it is a constant or invariant, it is always
4963 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4966 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4967 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4968 scalar_shift_arg
= false;
4971 /* If the shift amount is computed by a pattern stmt we cannot
4972 use the scalar amount directly thus give up and use a vector
4974 if (dt
[1] == vect_internal_def
)
4976 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4977 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4978 scalar_shift_arg
= false;
4983 if (dump_enabled_p ())
4984 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4985 "operand mode requires invariant argument.\n");
4989 /* Vector shifted by vector. */
4990 if (!scalar_shift_arg
)
4992 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4993 if (dump_enabled_p ())
4994 dump_printf_loc (MSG_NOTE
, vect_location
,
4995 "vector/vector shift/rotate found.\n");
4998 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4999 if (op1_vectype
== NULL_TREE
5000 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5002 if (dump_enabled_p ())
5003 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5004 "unusable type for last operand in"
5005 " vector/vector shift/rotate.\n");
5009 /* See if the machine has a vector shifted by scalar insn and if not
5010 then see if it has a vector shifted by vector insn. */
5013 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5015 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5017 if (dump_enabled_p ())
5018 dump_printf_loc (MSG_NOTE
, vect_location
,
5019 "vector/scalar shift/rotate found.\n");
5023 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5025 && (optab_handler (optab
, TYPE_MODE (vectype
))
5026 != CODE_FOR_nothing
))
5028 scalar_shift_arg
= false;
5030 if (dump_enabled_p ())
5031 dump_printf_loc (MSG_NOTE
, vect_location
,
5032 "vector/vector shift/rotate found.\n");
5034 /* Unlike the other binary operators, shifts/rotates have
5035 the rhs being int, instead of the same type as the lhs,
5036 so make sure the scalar is the right type if we are
5037 dealing with vectors of long long/long/short/char. */
5038 if (dt
[1] == vect_constant_def
)
5039 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5040 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5044 && TYPE_MODE (TREE_TYPE (vectype
))
5045 != TYPE_MODE (TREE_TYPE (op1
)))
5047 if (dump_enabled_p ())
5048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5049 "unusable type for last operand in"
5050 " vector/vector shift/rotate.\n");
5053 if (vec_stmt
&& !slp_node
)
5055 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5056 op1
= vect_init_vector (stmt
, op1
,
5057 TREE_TYPE (vectype
), NULL
);
5064 /* Supportable by target? */
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5072 vec_mode
= TYPE_MODE (vectype
);
5073 icode
= (int) optab_handler (optab
, vec_mode
);
5074 if (icode
== CODE_FOR_nothing
)
5076 if (dump_enabled_p ())
5077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5078 "op not supported by target.\n");
5079 /* Check only during analysis. */
5080 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5081 || (vf
< vect_min_worthwhile_factor (code
)
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_NOTE
, vect_location
,
5086 "proceeding using word mode.\n");
5089 /* Worthwhile without SIMD support? Check only during analysis. */
5090 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
5091 && vf
< vect_min_worthwhile_factor (code
)
5094 if (dump_enabled_p ())
5095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5096 "not worthwhile without SIMD support.\n");
5100 if (!vec_stmt
) /* transformation not required. */
5102 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_NOTE
, vect_location
,
5105 "=== vectorizable_shift ===\n");
5106 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5112 if (dump_enabled_p ())
5113 dump_printf_loc (MSG_NOTE
, vect_location
,
5114 "transform binary/unary operation.\n");
5117 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5119 prev_stmt_info
= NULL
;
5120 for (j
= 0; j
< ncopies
; j
++)
5125 if (scalar_shift_arg
)
5127 /* Vector shl and shr insn patterns can be defined with scalar
5128 operand 2 (shift operand). In this case, use constant or loop
5129 invariant op1 directly, without extending it to vector mode
5131 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5132 if (!VECTOR_MODE_P (optab_op2_mode
))
5134 if (dump_enabled_p ())
5135 dump_printf_loc (MSG_NOTE
, vect_location
,
5136 "operand 1 using scalar mode.\n");
5138 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5139 vec_oprnds1
.quick_push (vec_oprnd1
);
5142 /* Store vec_oprnd1 for every vector stmt to be created
5143 for SLP_NODE. We check during the analysis that all
5144 the shift arguments are the same.
5145 TODO: Allow different constants for different vector
5146 stmts generated for an SLP instance. */
5147 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5148 vec_oprnds1
.quick_push (vec_oprnd1
);
5153 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5154 (a special case for certain kind of vector shifts); otherwise,
5155 operand 1 should be of a vector type (the usual case). */
5157 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5160 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5164 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5166 /* Arguments are ready. Create the new vector stmt. */
5167 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5169 vop1
= vec_oprnds1
[i
];
5170 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5171 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5172 gimple_assign_set_lhs (new_stmt
, new_temp
);
5173 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5175 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5182 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5184 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5185 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5188 vec_oprnds0
.release ();
5189 vec_oprnds1
.release ();
5195 /* Function vectorizable_operation.
5197 Check if STMT performs a binary, unary or ternary operation that can
5199 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5200 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5201 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5204 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5205 gimple
**vec_stmt
, slp_tree slp_node
)
5209 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5210 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5212 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5213 enum tree_code code
;
5214 machine_mode vec_mode
;
5218 bool target_support_p
;
5220 enum vect_def_type dt
[3]
5221 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5223 gimple
*new_stmt
= NULL
;
5224 stmt_vec_info prev_stmt_info
;
5230 vec
<tree
> vec_oprnds0
= vNULL
;
5231 vec
<tree
> vec_oprnds1
= vNULL
;
5232 vec
<tree
> vec_oprnds2
= vNULL
;
5233 tree vop0
, vop1
, vop2
;
5234 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5235 vec_info
*vinfo
= stmt_info
->vinfo
;
5238 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5241 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5245 /* Is STMT a vectorizable binary/unary operation? */
5246 if (!is_gimple_assign (stmt
))
5249 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5252 code
= gimple_assign_rhs_code (stmt
);
5254 /* For pointer addition, we should use the normal plus for
5255 the vector addition. */
5256 if (code
== POINTER_PLUS_EXPR
)
5259 /* Support only unary or binary operations. */
5260 op_type
= TREE_CODE_LENGTH (code
);
5261 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5263 if (dump_enabled_p ())
5264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5265 "num. args = %d (not unary/binary/ternary op).\n",
5270 scalar_dest
= gimple_assign_lhs (stmt
);
5271 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5273 /* Most operations cannot handle bit-precision types without extra
5275 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5276 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5277 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
5278 /* Exception are bitwise binary operations. */
5279 && code
!= BIT_IOR_EXPR
5280 && code
!= BIT_XOR_EXPR
5281 && code
!= BIT_AND_EXPR
)
5283 if (dump_enabled_p ())
5284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5285 "bit-precision arithmetic not supported.\n");
5289 op0
= gimple_assign_rhs1 (stmt
);
5290 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5292 if (dump_enabled_p ())
5293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5294 "use not simple.\n");
5297 /* If op0 is an external or constant def use a vector type with
5298 the same size as the output vector type. */
5301 /* For boolean type we cannot determine vectype by
5302 invariant value (don't know whether it is a vector
5303 of booleans or vector of integers). We use output
5304 vectype because operations on boolean don't change
5306 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5308 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5310 if (dump_enabled_p ())
5311 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5312 "not supported operation on bool value.\n");
5315 vectype
= vectype_out
;
5318 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5321 gcc_assert (vectype
);
5324 if (dump_enabled_p ())
5326 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5327 "no vectype for scalar type ");
5328 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5330 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5336 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5337 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5338 if (nunits_out
!= nunits_in
)
5341 if (op_type
== binary_op
|| op_type
== ternary_op
)
5343 op1
= gimple_assign_rhs2 (stmt
);
5344 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5346 if (dump_enabled_p ())
5347 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5348 "use not simple.\n");
5352 if (op_type
== ternary_op
)
5354 op2
= gimple_assign_rhs3 (stmt
);
5355 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5357 if (dump_enabled_p ())
5358 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5359 "use not simple.\n");
5365 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5369 /* Multiple types in SLP are handled by creating the appropriate number of
5370 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5375 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
5377 gcc_assert (ncopies
>= 1);
5379 /* Shifts are handled in vectorizable_shift (). */
5380 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5381 || code
== RROTATE_EXPR
)
5384 /* Supportable by target? */
5386 vec_mode
= TYPE_MODE (vectype
);
5387 if (code
== MULT_HIGHPART_EXPR
)
5388 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5391 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5394 if (dump_enabled_p ())
5395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5399 target_support_p
= (optab_handler (optab
, vec_mode
)
5400 != CODE_FOR_nothing
);
5403 if (!target_support_p
)
5405 if (dump_enabled_p ())
5406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5407 "op not supported by target.\n");
5408 /* Check only during analysis. */
5409 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5410 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_NOTE
, vect_location
,
5414 "proceeding using word mode.\n");
5417 /* Worthwhile without SIMD support? Check only during analysis. */
5418 if (!VECTOR_MODE_P (vec_mode
)
5420 && vf
< vect_min_worthwhile_factor (code
))
5422 if (dump_enabled_p ())
5423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5424 "not worthwhile without SIMD support.\n");
5428 if (!vec_stmt
) /* transformation not required. */
5430 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5431 if (dump_enabled_p ())
5432 dump_printf_loc (MSG_NOTE
, vect_location
,
5433 "=== vectorizable_operation ===\n");
5434 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5440 if (dump_enabled_p ())
5441 dump_printf_loc (MSG_NOTE
, vect_location
,
5442 "transform binary/unary operation.\n");
5445 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5447 /* In case the vectorization factor (VF) is bigger than the number
5448 of elements that we can fit in a vectype (nunits), we have to generate
5449 more than one vector stmt - i.e - we need to "unroll" the
5450 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5451 from one copy of the vector stmt to the next, in the field
5452 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5453 stages to find the correct vector defs to be used when vectorizing
5454 stmts that use the defs of the current stmt. The example below
5455 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5456 we need to create 4 vectorized stmts):
5458 before vectorization:
5459 RELATED_STMT VEC_STMT
5463 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5465 RELATED_STMT VEC_STMT
5466 VS1_0: vx0 = memref0 VS1_1 -
5467 VS1_1: vx1 = memref1 VS1_2 -
5468 VS1_2: vx2 = memref2 VS1_3 -
5469 VS1_3: vx3 = memref3 - -
5470 S1: x = load - VS1_0
5473 step2: vectorize stmt S2 (done here):
5474 To vectorize stmt S2 we first need to find the relevant vector
5475 def for the first operand 'x'. This is, as usual, obtained from
5476 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5477 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5478 relevant vector def 'vx0'. Having found 'vx0' we can generate
5479 the vector stmt VS2_0, and as usual, record it in the
5480 STMT_VINFO_VEC_STMT of stmt S2.
5481 When creating the second copy (VS2_1), we obtain the relevant vector
5482 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5483 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5484 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5485 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5486 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5487 chain of stmts and pointers:
5488 RELATED_STMT VEC_STMT
5489 VS1_0: vx0 = memref0 VS1_1 -
5490 VS1_1: vx1 = memref1 VS1_2 -
5491 VS1_2: vx2 = memref2 VS1_3 -
5492 VS1_3: vx3 = memref3 - -
5493 S1: x = load - VS1_0
5494 VS2_0: vz0 = vx0 + v1 VS2_1 -
5495 VS2_1: vz1 = vx1 + v1 VS2_2 -
5496 VS2_2: vz2 = vx2 + v1 VS2_3 -
5497 VS2_3: vz3 = vx3 + v1 - -
5498 S2: z = x + 1 - VS2_0 */
5500 prev_stmt_info
= NULL
;
5501 for (j
= 0; j
< ncopies
; j
++)
5506 if (op_type
== binary_op
|| op_type
== ternary_op
)
5507 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5510 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5512 if (op_type
== ternary_op
)
5513 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5518 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5519 if (op_type
== ternary_op
)
5521 tree vec_oprnd
= vec_oprnds2
.pop ();
5522 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5527 /* Arguments are ready. Create the new vector stmt. */
5528 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5530 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5531 ? vec_oprnds1
[i
] : NULL_TREE
);
5532 vop2
= ((op_type
== ternary_op
)
5533 ? vec_oprnds2
[i
] : NULL_TREE
);
5534 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5535 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5536 gimple_assign_set_lhs (new_stmt
, new_temp
);
5537 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5539 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5546 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5548 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5549 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5552 vec_oprnds0
.release ();
5553 vec_oprnds1
.release ();
5554 vec_oprnds2
.release ();
5559 /* A helper function to ensure data reference DR's base alignment
5563 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5568 if (DR_VECT_AUX (dr
)->base_misaligned
)
5570 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5571 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5573 if (decl_in_symtab_p (base_decl
))
5574 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5577 SET_DECL_ALIGN (base_decl
, TYPE_ALIGN (vectype
));
5578 DECL_USER_ALIGN (base_decl
) = 1;
5580 DR_VECT_AUX (dr
)->base_misaligned
= false;
5585 /* Function get_group_alias_ptr_type.
5587 Return the alias type for the group starting at FIRST_STMT. */
5590 get_group_alias_ptr_type (gimple
*first_stmt
)
5592 struct data_reference
*first_dr
, *next_dr
;
5595 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5596 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5599 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5600 if (get_alias_set (DR_REF (first_dr
))
5601 != get_alias_set (DR_REF (next_dr
)))
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_NOTE
, vect_location
,
5605 "conflicting alias set types.\n");
5606 return ptr_type_node
;
5608 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5610 return reference_alias_ptr_type (DR_REF (first_dr
));
5614 /* Function vectorizable_store.
5616 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5618 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5619 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5620 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5623 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5629 tree vec_oprnd
= NULL_TREE
;
5630 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5631 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5633 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5634 struct loop
*loop
= NULL
;
5635 machine_mode vec_mode
;
5637 enum dr_alignment_support alignment_support_scheme
;
5639 enum vect_def_type dt
;
5640 stmt_vec_info prev_stmt_info
= NULL
;
5641 tree dataref_ptr
= NULL_TREE
;
5642 tree dataref_offset
= NULL_TREE
;
5643 gimple
*ptr_incr
= NULL
;
5646 gimple
*next_stmt
, *first_stmt
;
5648 unsigned int group_size
, i
;
5649 vec
<tree
> oprnds
= vNULL
;
5650 vec
<tree
> result_chain
= vNULL
;
5652 tree offset
= NULL_TREE
;
5653 vec
<tree
> vec_oprnds
= vNULL
;
5654 bool slp
= (slp_node
!= NULL
);
5655 unsigned int vec_num
;
5656 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5657 vec_info
*vinfo
= stmt_info
->vinfo
;
5659 gather_scatter_info gs_info
;
5660 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5663 vec_load_store_type vls_type
;
5666 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5669 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5673 /* Is vectorizable store? */
5675 if (!is_gimple_assign (stmt
))
5678 scalar_dest
= gimple_assign_lhs (stmt
);
5679 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5680 && is_pattern_stmt_p (stmt_info
))
5681 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5682 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5683 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5684 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5685 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5686 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5687 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5688 && TREE_CODE (scalar_dest
) != MEM_REF
)
5691 /* Cannot have hybrid store SLP -- that would mean storing to the
5692 same location twice. */
5693 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5695 gcc_assert (gimple_assign_single_p (stmt
));
5697 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5698 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5702 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5703 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5708 /* Multiple types in SLP are handled by creating the appropriate number of
5709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5714 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5716 gcc_assert (ncopies
>= 1);
5718 /* FORNOW. This restriction should be relaxed. */
5719 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5721 if (dump_enabled_p ())
5722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5723 "multiple types in nested loop.\n");
5727 op
= gimple_assign_rhs1 (stmt
);
5729 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5733 "use not simple.\n");
5737 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5738 vls_type
= VLS_STORE_INVARIANT
;
5740 vls_type
= VLS_STORE
;
5742 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5745 elem_type
= TREE_TYPE (vectype
);
5746 vec_mode
= TYPE_MODE (vectype
);
5748 /* FORNOW. In some cases can vectorize even if data-type not supported
5749 (e.g. - array initialization with 0). */
5750 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5753 if (!STMT_VINFO_DATA_REF (stmt_info
))
5756 vect_memory_access_type memory_access_type
;
5757 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5758 &memory_access_type
, &gs_info
))
5761 if (!vec_stmt
) /* transformation not required. */
5763 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5764 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5765 /* The SLP costs are calculated during SLP analysis. */
5766 if (!PURE_SLP_STMT (stmt_info
))
5767 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5771 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5775 ensure_base_align (stmt_info
, dr
);
5777 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5779 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5780 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5781 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5782 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5783 edge pe
= loop_preheader_edge (loop
);
5786 enum { NARROW
, NONE
, WIDEN
} modifier
;
5787 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5789 if (nunits
== (unsigned int) scatter_off_nunits
)
5791 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5793 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5796 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5797 sel
[i
] = i
| nunits
;
5799 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
5800 gcc_assert (perm_mask
!= NULL_TREE
);
5802 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5804 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5807 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5808 sel
[i
] = i
| scatter_off_nunits
;
5810 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5811 gcc_assert (perm_mask
!= NULL_TREE
);
5817 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5818 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5819 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5820 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5821 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5822 scaletype
= TREE_VALUE (arglist
);
5824 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5825 && TREE_CODE (rettype
) == VOID_TYPE
);
5827 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5828 if (!is_gimple_min_invariant (ptr
))
5830 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5831 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5832 gcc_assert (!new_bb
);
5835 /* Currently we support only unconditional scatter stores,
5836 so mask should be all ones. */
5837 mask
= build_int_cst (masktype
, -1);
5838 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5840 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5842 prev_stmt_info
= NULL
;
5843 for (j
= 0; j
< ncopies
; ++j
)
5848 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5850 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5852 else if (modifier
!= NONE
&& (j
& 1))
5854 if (modifier
== WIDEN
)
5857 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5858 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5861 else if (modifier
== NARROW
)
5863 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5866 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5875 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5877 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5881 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5883 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5884 == TYPE_VECTOR_SUBPARTS (srctype
));
5885 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5886 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5887 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5888 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5892 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5894 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5895 == TYPE_VECTOR_SUBPARTS (idxtype
));
5896 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5897 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5898 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5899 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5904 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5906 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5908 if (prev_stmt_info
== NULL
)
5909 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5911 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5912 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5917 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5920 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5921 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5922 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5924 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5927 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5929 /* We vectorize all the stmts of the interleaving group when we
5930 reach the last stmt in the group. */
5931 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5932 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5941 grouped_store
= false;
5942 /* VEC_NUM is the number of vect stmts to be created for this
5944 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5945 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5946 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5947 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5948 op
= gimple_assign_rhs1 (first_stmt
);
5951 /* VEC_NUM is the number of vect stmts to be created for this
5953 vec_num
= group_size
;
5955 ref_type
= get_group_alias_ptr_type (first_stmt
);
5961 group_size
= vec_num
= 1;
5962 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
5965 if (dump_enabled_p ())
5966 dump_printf_loc (MSG_NOTE
, vect_location
,
5967 "transform store. ncopies = %d\n", ncopies
);
5969 if (memory_access_type
== VMAT_ELEMENTWISE
5970 || memory_access_type
== VMAT_STRIDED_SLP
)
5972 gimple_stmt_iterator incr_gsi
;
5978 gimple_seq stmts
= NULL
;
5979 tree stride_base
, stride_step
, alias_off
;
5983 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5986 = fold_build_pointer_plus
5987 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5988 size_binop (PLUS_EXPR
,
5989 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5990 convert_to_ptrofftype (DR_INIT (first_dr
))));
5991 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5993 /* For a store with loop-invariant (but other than power-of-2)
5994 stride (i.e. not a grouped access) like so:
5996 for (i = 0; i < n; i += stride)
5999 we generate a new induction variable and new stores from
6000 the components of the (vectorized) rhs:
6002 for (j = 0; ; j += VF*stride)
6007 array[j + stride] = tmp2;
6011 unsigned nstores
= nunits
;
6013 tree ltype
= elem_type
;
6016 if (group_size
< nunits
6017 && nunits
% group_size
== 0)
6019 nstores
= nunits
/ group_size
;
6021 ltype
= build_vector_type (elem_type
, group_size
);
6023 else if (group_size
>= nunits
6024 && group_size
% nunits
== 0)
6030 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6031 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6034 ivstep
= stride_step
;
6035 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6036 build_int_cst (TREE_TYPE (ivstep
), vf
));
6038 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6040 create_iv (stride_base
, ivstep
, NULL
,
6041 loop
, &incr_gsi
, insert_after
,
6043 incr
= gsi_stmt (incr_gsi
);
6044 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6046 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6048 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6050 prev_stmt_info
= NULL
;
6051 alias_off
= build_int_cst (ref_type
, 0);
6052 next_stmt
= first_stmt
;
6053 for (g
= 0; g
< group_size
; g
++)
6055 running_off
= offvar
;
6058 tree size
= TYPE_SIZE_UNIT (ltype
);
6059 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6061 tree newoff
= copy_ssa_name (running_off
, NULL
);
6062 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6064 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6065 running_off
= newoff
;
6067 unsigned int group_el
= 0;
6068 unsigned HOST_WIDE_INT
6069 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6070 for (j
= 0; j
< ncopies
; j
++)
6072 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6073 and first_stmt == stmt. */
6078 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6080 vec_oprnd
= vec_oprnds
[0];
6084 gcc_assert (gimple_assign_single_p (next_stmt
));
6085 op
= gimple_assign_rhs1 (next_stmt
);
6086 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6092 vec_oprnd
= vec_oprnds
[j
];
6095 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6096 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6100 for (i
= 0; i
< nstores
; i
++)
6102 tree newref
, newoff
;
6103 gimple
*incr
, *assign
;
6104 tree size
= TYPE_SIZE (ltype
);
6105 /* Extract the i'th component. */
6106 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6107 bitsize_int (i
), size
);
6108 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6111 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6115 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6117 newref
= build2 (MEM_REF
, ltype
,
6118 running_off
, this_off
);
6120 /* And store it to *running_off. */
6121 assign
= gimple_build_assign (newref
, elem
);
6122 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6126 || group_el
== group_size
)
6128 newoff
= copy_ssa_name (running_off
, NULL
);
6129 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6130 running_off
, stride_step
);
6131 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6133 running_off
= newoff
;
6136 if (g
== group_size
- 1
6139 if (j
== 0 && i
== 0)
6140 STMT_VINFO_VEC_STMT (stmt_info
)
6141 = *vec_stmt
= assign
;
6143 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6144 prev_stmt_info
= vinfo_for_stmt (assign
);
6148 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6153 vec_oprnds
.release ();
6157 auto_vec
<tree
> dr_chain (group_size
);
6158 oprnds
.create (group_size
);
6160 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6161 gcc_assert (alignment_support_scheme
);
6162 /* Targets with store-lane instructions must not require explicit
6164 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6165 || alignment_support_scheme
== dr_aligned
6166 || alignment_support_scheme
== dr_unaligned_supported
);
6168 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6169 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6170 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6172 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6173 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6175 aggr_type
= vectype
;
6177 /* In case the vectorization factor (VF) is bigger than the number
6178 of elements that we can fit in a vectype (nunits), we have to generate
6179 more than one vector stmt - i.e - we need to "unroll" the
6180 vector stmt by a factor VF/nunits. For more details see documentation in
6181 vect_get_vec_def_for_copy_stmt. */
6183 /* In case of interleaving (non-unit grouped access):
6190 We create vectorized stores starting from base address (the access of the
6191 first stmt in the chain (S2 in the above example), when the last store stmt
6192 of the chain (S4) is reached:
6195 VS2: &base + vec_size*1 = vx0
6196 VS3: &base + vec_size*2 = vx1
6197 VS4: &base + vec_size*3 = vx3
6199 Then permutation statements are generated:
6201 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6202 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6205 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6206 (the order of the data-refs in the output of vect_permute_store_chain
6207 corresponds to the order of scalar stmts in the interleaving chain - see
6208 the documentation of vect_permute_store_chain()).
6210 In case of both multiple types and interleaving, above vector stores and
6211 permutation stmts are created for every copy. The result vector stmts are
6212 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6213 STMT_VINFO_RELATED_STMT for the next copies.
6216 prev_stmt_info
= NULL
;
6217 for (j
= 0; j
< ncopies
; j
++)
6224 /* Get vectorized arguments for SLP_NODE. */
6225 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6226 NULL
, slp_node
, -1);
6228 vec_oprnd
= vec_oprnds
[0];
6232 /* For interleaved stores we collect vectorized defs for all the
6233 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6234 used as an input to vect_permute_store_chain(), and OPRNDS as
6235 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6237 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6238 OPRNDS are of size 1. */
6239 next_stmt
= first_stmt
;
6240 for (i
= 0; i
< group_size
; i
++)
6242 /* Since gaps are not supported for interleaved stores,
6243 GROUP_SIZE is the exact number of stmts in the chain.
6244 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6245 there is no interleaving, GROUP_SIZE is 1, and only one
6246 iteration of the loop will be executed. */
6247 gcc_assert (next_stmt
6248 && gimple_assign_single_p (next_stmt
));
6249 op
= gimple_assign_rhs1 (next_stmt
);
6251 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6252 dr_chain
.quick_push (vec_oprnd
);
6253 oprnds
.quick_push (vec_oprnd
);
6254 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6258 /* We should have catched mismatched types earlier. */
6259 gcc_assert (useless_type_conversion_p (vectype
,
6260 TREE_TYPE (vec_oprnd
)));
6261 bool simd_lane_access_p
6262 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6263 if (simd_lane_access_p
6264 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6265 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6266 && integer_zerop (DR_OFFSET (first_dr
))
6267 && integer_zerop (DR_INIT (first_dr
))
6268 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6269 get_alias_set (TREE_TYPE (ref_type
))))
6271 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6272 dataref_offset
= build_int_cst (ref_type
, 0);
6277 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6278 simd_lane_access_p
? loop
: NULL
,
6279 offset
, &dummy
, gsi
, &ptr_incr
,
6280 simd_lane_access_p
, &inv_p
);
6281 gcc_assert (bb_vinfo
|| !inv_p
);
6285 /* For interleaved stores we created vectorized defs for all the
6286 defs stored in OPRNDS in the previous iteration (previous copy).
6287 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6288 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6290 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6291 OPRNDS are of size 1. */
6292 for (i
= 0; i
< group_size
; i
++)
6295 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6296 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6297 dr_chain
[i
] = vec_oprnd
;
6298 oprnds
[i
] = vec_oprnd
;
6302 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6303 TYPE_SIZE_UNIT (aggr_type
));
6305 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6306 TYPE_SIZE_UNIT (aggr_type
));
6309 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6313 /* Combine all the vectors into an array. */
6314 vec_array
= create_vector_array (vectype
, vec_num
);
6315 for (i
= 0; i
< vec_num
; i
++)
6317 vec_oprnd
= dr_chain
[i
];
6318 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6322 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6323 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6324 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
6325 gimple_call_set_lhs (new_stmt
, data_ref
);
6326 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6334 result_chain
.create (group_size
);
6336 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6340 next_stmt
= first_stmt
;
6341 for (i
= 0; i
< vec_num
; i
++)
6343 unsigned align
, misalign
;
6346 /* Bump the vector pointer. */
6347 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6351 vec_oprnd
= vec_oprnds
[i
];
6352 else if (grouped_store
)
6353 /* For grouped stores vectorized defs are interleaved in
6354 vect_permute_store_chain(). */
6355 vec_oprnd
= result_chain
[i
];
6357 data_ref
= fold_build2 (MEM_REF
, vectype
,
6361 : build_int_cst (ref_type
, 0));
6362 align
= TYPE_ALIGN_UNIT (vectype
);
6363 if (aligned_access_p (first_dr
))
6365 else if (DR_MISALIGNMENT (first_dr
) == -1)
6367 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6368 align
= TYPE_ALIGN_UNIT (elem_type
);
6370 align
= get_object_alignment (DR_REF (first_dr
))
6373 TREE_TYPE (data_ref
)
6374 = build_aligned_type (TREE_TYPE (data_ref
),
6375 align
* BITS_PER_UNIT
);
6379 TREE_TYPE (data_ref
)
6380 = build_aligned_type (TREE_TYPE (data_ref
),
6381 TYPE_ALIGN (elem_type
));
6382 misalign
= DR_MISALIGNMENT (first_dr
);
6384 if (dataref_offset
== NULL_TREE
6385 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6386 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6389 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6391 tree perm_mask
= perm_mask_for_reverse (vectype
);
6393 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6395 tree new_temp
= make_ssa_name (perm_dest
);
6397 /* Generate the permute statement. */
6399 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6400 vec_oprnd
, perm_mask
);
6401 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6403 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6404 vec_oprnd
= new_temp
;
6407 /* Arguments are ready. Create the new vector stmt. */
6408 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6409 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6414 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6422 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6424 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6425 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6430 result_chain
.release ();
6431 vec_oprnds
.release ();
6436 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6437 VECTOR_CST mask. No checks are made that the target platform supports the
6438 mask, so callers may wish to test can_vec_perm_p separately, or use
6439 vect_gen_perm_mask_checked. */
6442 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6444 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6447 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6449 mask_elt_type
= lang_hooks
.types
.type_for_mode
6450 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6451 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6453 mask_elts
= XALLOCAVEC (tree
, nunits
);
6454 for (i
= nunits
- 1; i
>= 0; i
--)
6455 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6456 mask_vec
= build_vector (mask_type
, mask_elts
);
6461 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6462 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6465 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6467 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6468 return vect_gen_perm_mask_any (vectype
, sel
);
6471 /* Given a vector variable X and Y, that was generated for the scalar
6472 STMT, generate instructions to permute the vector elements of X and Y
6473 using permutation mask MASK_VEC, insert them at *GSI and return the
6474 permuted vector variable. */
6477 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6478 gimple_stmt_iterator
*gsi
)
6480 tree vectype
= TREE_TYPE (x
);
6481 tree perm_dest
, data_ref
;
6484 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6485 data_ref
= make_ssa_name (perm_dest
);
6487 /* Generate the permute statement. */
6488 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6489 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6494 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6495 inserting them on the loops preheader edge. Returns true if we
6496 were successful in doing so (and thus STMT can be moved then),
6497 otherwise returns false. */
6500 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6506 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6508 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6509 if (!gimple_nop_p (def_stmt
)
6510 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6512 /* Make sure we don't need to recurse. While we could do
6513 so in simple cases when there are more complex use webs
6514 we don't have an easy way to preserve stmt order to fulfil
6515 dependencies within them. */
6518 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6520 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6522 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6523 if (!gimple_nop_p (def_stmt2
)
6524 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6534 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6536 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6537 if (!gimple_nop_p (def_stmt
)
6538 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6540 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6541 gsi_remove (&gsi
, false);
6542 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6549 /* vectorizable_load.
6551 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6553 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6554 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6555 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6558 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6559 slp_tree slp_node
, slp_instance slp_node_instance
)
6562 tree vec_dest
= NULL
;
6563 tree data_ref
= NULL
;
6564 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6565 stmt_vec_info prev_stmt_info
;
6566 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6567 struct loop
*loop
= NULL
;
6568 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6569 bool nested_in_vect_loop
= false;
6570 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6574 gimple
*new_stmt
= NULL
;
6576 enum dr_alignment_support alignment_support_scheme
;
6577 tree dataref_ptr
= NULL_TREE
;
6578 tree dataref_offset
= NULL_TREE
;
6579 gimple
*ptr_incr
= NULL
;
6581 int i
, j
, group_size
, group_gap_adj
;
6582 tree msq
= NULL_TREE
, lsq
;
6583 tree offset
= NULL_TREE
;
6584 tree byte_offset
= NULL_TREE
;
6585 tree realignment_token
= NULL_TREE
;
6587 vec
<tree
> dr_chain
= vNULL
;
6588 bool grouped_load
= false;
6590 gimple
*first_stmt_for_drptr
= NULL
;
6592 bool compute_in_loop
= false;
6593 struct loop
*at_loop
;
6595 bool slp
= (slp_node
!= NULL
);
6596 bool slp_perm
= false;
6597 enum tree_code code
;
6598 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6601 gather_scatter_info gs_info
;
6602 vec_info
*vinfo
= stmt_info
->vinfo
;
6605 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6608 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6612 /* Is vectorizable load? */
6613 if (!is_gimple_assign (stmt
))
6616 scalar_dest
= gimple_assign_lhs (stmt
);
6617 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6620 code
= gimple_assign_rhs_code (stmt
);
6621 if (code
!= ARRAY_REF
6622 && code
!= BIT_FIELD_REF
6623 && code
!= INDIRECT_REF
6624 && code
!= COMPONENT_REF
6625 && code
!= IMAGPART_EXPR
6626 && code
!= REALPART_EXPR
6628 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6631 if (!STMT_VINFO_DATA_REF (stmt_info
))
6634 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6635 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6639 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6640 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6641 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6646 /* Multiple types in SLP are handled by creating the appropriate number of
6647 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6652 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6654 gcc_assert (ncopies
>= 1);
6656 /* FORNOW. This restriction should be relaxed. */
6657 if (nested_in_vect_loop
&& ncopies
> 1)
6659 if (dump_enabled_p ())
6660 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6661 "multiple types in nested loop.\n");
6665 /* Invalidate assumptions made by dependence analysis when vectorization
6666 on the unrolled body effectively re-orders stmts. */
6668 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6669 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6670 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6672 if (dump_enabled_p ())
6673 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6674 "cannot perform implicit CSE when unrolling "
6675 "with negative dependence distance\n");
6679 elem_type
= TREE_TYPE (vectype
);
6680 mode
= TYPE_MODE (vectype
);
6682 /* FORNOW. In some cases can vectorize even if data-type not supported
6683 (e.g. - data copies). */
6684 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6686 if (dump_enabled_p ())
6687 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6688 "Aligned load, but unsupported type.\n");
6692 /* Check if the load is a part of an interleaving chain. */
6693 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6695 grouped_load
= true;
6697 gcc_assert (!nested_in_vect_loop
);
6698 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6700 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6701 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6703 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6706 /* Invalidate assumptions made by dependence analysis when vectorization
6707 on the unrolled body effectively re-orders stmts. */
6708 if (!PURE_SLP_STMT (stmt_info
)
6709 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6710 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6711 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6713 if (dump_enabled_p ())
6714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6715 "cannot perform implicit CSE when performing "
6716 "group loads with negative dependence distance\n");
6720 /* Similarly when the stmt is a load that is both part of a SLP
6721 instance and a loop vectorized stmt via the same-dr mechanism
6722 we have to give up. */
6723 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6724 && (STMT_SLP_TYPE (stmt_info
)
6725 != STMT_SLP_TYPE (vinfo_for_stmt
6726 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6728 if (dump_enabled_p ())
6729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6730 "conflicting SLP types for CSEd load\n");
6735 vect_memory_access_type memory_access_type
;
6736 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6737 &memory_access_type
, &gs_info
))
6740 if (!vec_stmt
) /* transformation not required. */
6743 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6744 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6745 /* The SLP costs are calculated during SLP analysis. */
6746 if (!PURE_SLP_STMT (stmt_info
))
6747 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6753 gcc_assert (memory_access_type
6754 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6756 if (dump_enabled_p ())
6757 dump_printf_loc (MSG_NOTE
, vect_location
,
6758 "transform load. ncopies = %d\n", ncopies
);
6762 ensure_base_align (stmt_info
, dr
);
6764 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6766 tree vec_oprnd0
= NULL_TREE
, op
;
6767 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6768 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6769 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6770 edge pe
= loop_preheader_edge (loop
);
6773 enum { NARROW
, NONE
, WIDEN
} modifier
;
6774 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6776 if (nunits
== gather_off_nunits
)
6778 else if (nunits
== gather_off_nunits
/ 2)
6780 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6783 for (i
= 0; i
< gather_off_nunits
; ++i
)
6784 sel
[i
] = i
| nunits
;
6786 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
6788 else if (nunits
== gather_off_nunits
* 2)
6790 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6793 for (i
= 0; i
< nunits
; ++i
)
6794 sel
[i
] = i
< gather_off_nunits
6795 ? i
: i
+ nunits
- gather_off_nunits
;
6797 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6803 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6804 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6805 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6806 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6807 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6808 scaletype
= TREE_VALUE (arglist
);
6809 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6811 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6813 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6814 if (!is_gimple_min_invariant (ptr
))
6816 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6817 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6818 gcc_assert (!new_bb
);
6821 /* Currently we support only unconditional gather loads,
6822 so mask should be all ones. */
6823 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6824 mask
= build_int_cst (masktype
, -1);
6825 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6827 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6828 mask
= build_vector_from_val (masktype
, mask
);
6829 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6831 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6835 for (j
= 0; j
< 6; ++j
)
6837 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6838 mask
= build_real (TREE_TYPE (masktype
), r
);
6839 mask
= build_vector_from_val (masktype
, mask
);
6840 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6845 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6847 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6848 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6849 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6853 for (j
= 0; j
< 6; ++j
)
6855 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6856 merge
= build_real (TREE_TYPE (rettype
), r
);
6860 merge
= build_vector_from_val (rettype
, merge
);
6861 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6863 prev_stmt_info
= NULL
;
6864 for (j
= 0; j
< ncopies
; ++j
)
6866 if (modifier
== WIDEN
&& (j
& 1))
6867 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6868 perm_mask
, stmt
, gsi
);
6871 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6874 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6876 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6878 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6879 == TYPE_VECTOR_SUBPARTS (idxtype
));
6880 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6881 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6883 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6884 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6889 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6891 if (!useless_type_conversion_p (vectype
, rettype
))
6893 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6894 == TYPE_VECTOR_SUBPARTS (rettype
));
6895 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6896 gimple_call_set_lhs (new_stmt
, op
);
6897 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6898 var
= make_ssa_name (vec_dest
);
6899 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6901 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6905 var
= make_ssa_name (vec_dest
, new_stmt
);
6906 gimple_call_set_lhs (new_stmt
, var
);
6909 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6911 if (modifier
== NARROW
)
6918 var
= permute_vec_elements (prev_res
, var
,
6919 perm_mask
, stmt
, gsi
);
6920 new_stmt
= SSA_NAME_DEF_STMT (var
);
6923 if (prev_stmt_info
== NULL
)
6924 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6926 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6927 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6932 if (memory_access_type
== VMAT_ELEMENTWISE
6933 || memory_access_type
== VMAT_STRIDED_SLP
)
6935 gimple_stmt_iterator incr_gsi
;
6941 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6942 gimple_seq stmts
= NULL
;
6943 tree stride_base
, stride_step
, alias_off
;
6945 gcc_assert (!nested_in_vect_loop
);
6947 if (slp
&& grouped_load
)
6949 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6950 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6951 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6952 ref_type
= get_group_alias_ptr_type (first_stmt
);
6959 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6963 = fold_build_pointer_plus
6964 (DR_BASE_ADDRESS (first_dr
),
6965 size_binop (PLUS_EXPR
,
6966 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6967 convert_to_ptrofftype (DR_INIT (first_dr
))));
6968 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6970 /* For a load with loop-invariant (but other than power-of-2)
6971 stride (i.e. not a grouped access) like so:
6973 for (i = 0; i < n; i += stride)
6976 we generate a new induction variable and new accesses to
6977 form a new vector (or vectors, depending on ncopies):
6979 for (j = 0; ; j += VF*stride)
6981 tmp2 = array[j + stride];
6983 vectemp = {tmp1, tmp2, ...}
6986 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6987 build_int_cst (TREE_TYPE (stride_step
), vf
));
6989 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6991 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6992 loop
, &incr_gsi
, insert_after
,
6994 incr
= gsi_stmt (incr_gsi
);
6995 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6997 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6998 &stmts
, true, NULL_TREE
);
7000 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
7002 prev_stmt_info
= NULL
;
7003 running_off
= offvar
;
7004 alias_off
= build_int_cst (ref_type
, 0);
7005 int nloads
= nunits
;
7007 tree ltype
= TREE_TYPE (vectype
);
7008 tree lvectype
= vectype
;
7009 auto_vec
<tree
> dr_chain
;
7010 if (memory_access_type
== VMAT_STRIDED_SLP
)
7012 if (group_size
< nunits
)
7014 /* Avoid emitting a constructor of vector elements by performing
7015 the loads using an integer type of the same size,
7016 constructing a vector of those and then re-interpreting it
7017 as the original vector type. This works around the fact
7018 that the vec_init optab was only designed for scalar
7019 element modes and thus expansion goes through memory.
7020 This avoids a huge runtime penalty due to the general
7021 inability to perform store forwarding from smaller stores
7022 to a larger load. */
7024 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7025 enum machine_mode elmode
= mode_for_size (lsize
, MODE_INT
, 0);
7026 enum machine_mode vmode
= mode_for_vector (elmode
,
7027 nunits
/ group_size
);
7028 /* If we can't construct such a vector fall back to
7029 element loads of the original vector type. */
7030 if (VECTOR_MODE_P (vmode
)
7031 && optab_handler (vec_init_optab
, vmode
) != CODE_FOR_nothing
)
7033 nloads
= nunits
/ group_size
;
7035 ltype
= build_nonstandard_integer_type (lsize
, 1);
7036 lvectype
= build_vector_type (ltype
, nloads
);
7045 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7049 /* For SLP permutation support we need to load the whole group,
7050 not only the number of vector stmts the permutation result
7054 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
7055 dr_chain
.create (ncopies
);
7058 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7061 unsigned HOST_WIDE_INT
7062 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7063 for (j
= 0; j
< ncopies
; j
++)
7066 vec_alloc (v
, nloads
);
7067 for (i
= 0; i
< nloads
; i
++)
7069 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7071 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7072 build2 (MEM_REF
, ltype
,
7073 running_off
, this_off
));
7074 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7076 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7077 gimple_assign_lhs (new_stmt
));
7081 || group_el
== group_size
)
7083 tree newoff
= copy_ssa_name (running_off
);
7084 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7085 running_off
, stride_step
);
7086 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7088 running_off
= newoff
;
7094 tree vec_inv
= build_constructor (lvectype
, v
);
7095 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7096 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7097 if (lvectype
!= vectype
)
7099 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7101 build1 (VIEW_CONVERT_EXPR
,
7102 vectype
, new_temp
));
7103 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7110 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7112 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7117 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7119 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7120 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7126 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7127 slp_node_instance
, false, &n_perms
);
7134 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7135 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7136 /* For SLP vectorization we directly vectorize a subchain
7137 without permutation. */
7138 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7139 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7140 /* For BB vectorization always use the first stmt to base
7141 the data ref pointer on. */
7143 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7145 /* Check if the chain of loads is already vectorized. */
7146 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7147 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7148 ??? But we can only do so if there is exactly one
7149 as we have no way to get at the rest. Leave the CSE
7151 ??? With the group load eventually participating
7152 in multiple different permutations (having multiple
7153 slp nodes which refer to the same group) the CSE
7154 is even wrong code. See PR56270. */
7157 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7160 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7163 /* VEC_NUM is the number of vect stmts to be created for this group. */
7166 grouped_load
= false;
7167 /* For SLP permutation support we need to load the whole group,
7168 not only the number of vector stmts the permutation result
7171 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
7173 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7174 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7177 vec_num
= group_size
;
7179 ref_type
= get_group_alias_ptr_type (first_stmt
);
7185 group_size
= vec_num
= 1;
7187 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7190 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7191 gcc_assert (alignment_support_scheme
);
7192 /* Targets with load-lane instructions must not require explicit
7194 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7195 || alignment_support_scheme
== dr_aligned
7196 || alignment_support_scheme
== dr_unaligned_supported
);
7198 /* In case the vectorization factor (VF) is bigger than the number
7199 of elements that we can fit in a vectype (nunits), we have to generate
7200 more than one vector stmt - i.e - we need to "unroll" the
7201 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7202 from one copy of the vector stmt to the next, in the field
7203 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7204 stages to find the correct vector defs to be used when vectorizing
7205 stmts that use the defs of the current stmt. The example below
7206 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7207 need to create 4 vectorized stmts):
7209 before vectorization:
7210 RELATED_STMT VEC_STMT
7214 step 1: vectorize stmt S1:
7215 We first create the vector stmt VS1_0, and, as usual, record a
7216 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7217 Next, we create the vector stmt VS1_1, and record a pointer to
7218 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7219 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7221 RELATED_STMT VEC_STMT
7222 VS1_0: vx0 = memref0 VS1_1 -
7223 VS1_1: vx1 = memref1 VS1_2 -
7224 VS1_2: vx2 = memref2 VS1_3 -
7225 VS1_3: vx3 = memref3 - -
7226 S1: x = load - VS1_0
7229 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7230 information we recorded in RELATED_STMT field is used to vectorize
7233 /* In case of interleaving (non-unit grouped access):
7240 Vectorized loads are created in the order of memory accesses
7241 starting from the access of the first stmt of the chain:
7244 VS2: vx1 = &base + vec_size*1
7245 VS3: vx3 = &base + vec_size*2
7246 VS4: vx4 = &base + vec_size*3
7248 Then permutation statements are generated:
7250 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7251 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7254 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7255 (the order of the data-refs in the output of vect_permute_load_chain
7256 corresponds to the order of scalar stmts in the interleaving chain - see
7257 the documentation of vect_permute_load_chain()).
7258 The generation of permutation stmts and recording them in
7259 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7261 In case of both multiple types and interleaving, the vector loads and
7262 permutation stmts above are created for every copy. The result vector
7263 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7264 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7266 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7267 on a target that supports unaligned accesses (dr_unaligned_supported)
7268 we generate the following code:
7272 p = p + indx * vectype_size;
7277 Otherwise, the data reference is potentially unaligned on a target that
7278 does not support unaligned accesses (dr_explicit_realign_optimized) -
7279 then generate the following code, in which the data in each iteration is
7280 obtained by two vector loads, one from the previous iteration, and one
7281 from the current iteration:
7283 msq_init = *(floor(p1))
7284 p2 = initial_addr + VS - 1;
7285 realignment_token = call target_builtin;
7288 p2 = p2 + indx * vectype_size
7290 vec_dest = realign_load (msq, lsq, realignment_token)
7295 /* If the misalignment remains the same throughout the execution of the
7296 loop, we can create the init_addr and permutation mask at the loop
7297 preheader. Otherwise, it needs to be created inside the loop.
7298 This can only occur when vectorizing memory accesses in the inner-loop
7299 nested within an outer-loop that is being vectorized. */
7301 if (nested_in_vect_loop
7302 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7303 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7305 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7306 compute_in_loop
= true;
7309 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7310 || alignment_support_scheme
== dr_explicit_realign
)
7311 && !compute_in_loop
)
7313 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7314 alignment_support_scheme
, NULL_TREE
,
7316 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7318 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7319 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7326 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7327 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7329 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7330 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7332 aggr_type
= vectype
;
7334 prev_stmt_info
= NULL
;
7335 for (j
= 0; j
< ncopies
; j
++)
7337 /* 1. Create the vector or array pointer update chain. */
7340 bool simd_lane_access_p
7341 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7342 if (simd_lane_access_p
7343 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7344 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7345 && integer_zerop (DR_OFFSET (first_dr
))
7346 && integer_zerop (DR_INIT (first_dr
))
7347 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7348 get_alias_set (TREE_TYPE (ref_type
)))
7349 && (alignment_support_scheme
== dr_aligned
7350 || alignment_support_scheme
== dr_unaligned_supported
))
7352 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7353 dataref_offset
= build_int_cst (ref_type
, 0);
7356 else if (first_stmt_for_drptr
7357 && first_stmt
!= first_stmt_for_drptr
)
7360 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7361 at_loop
, offset
, &dummy
, gsi
,
7362 &ptr_incr
, simd_lane_access_p
,
7363 &inv_p
, byte_offset
);
7364 /* Adjust the pointer by the difference to first_stmt. */
7365 data_reference_p ptrdr
7366 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7367 tree diff
= fold_convert (sizetype
,
7368 size_binop (MINUS_EXPR
,
7371 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7376 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7377 offset
, &dummy
, gsi
, &ptr_incr
,
7378 simd_lane_access_p
, &inv_p
,
7381 else if (dataref_offset
)
7382 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7383 TYPE_SIZE_UNIT (aggr_type
));
7385 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7386 TYPE_SIZE_UNIT (aggr_type
));
7388 if (grouped_load
|| slp_perm
)
7389 dr_chain
.create (vec_num
);
7391 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7395 vec_array
= create_vector_array (vectype
, vec_num
);
7398 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7399 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7400 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7401 gimple_call_set_lhs (new_stmt
, vec_array
);
7402 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7404 /* Extract each vector into an SSA_NAME. */
7405 for (i
= 0; i
< vec_num
; i
++)
7407 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7409 dr_chain
.quick_push (new_temp
);
7412 /* Record the mapping between SSA_NAMEs and statements. */
7413 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7417 for (i
= 0; i
< vec_num
; i
++)
7420 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7423 /* 2. Create the vector-load in the loop. */
7424 switch (alignment_support_scheme
)
7427 case dr_unaligned_supported
:
7429 unsigned int align
, misalign
;
7432 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7435 : build_int_cst (ref_type
, 0));
7436 align
= TYPE_ALIGN_UNIT (vectype
);
7437 if (alignment_support_scheme
== dr_aligned
)
7439 gcc_assert (aligned_access_p (first_dr
));
7442 else if (DR_MISALIGNMENT (first_dr
) == -1)
7444 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7445 align
= TYPE_ALIGN_UNIT (elem_type
);
7447 align
= (get_object_alignment (DR_REF (first_dr
))
7450 TREE_TYPE (data_ref
)
7451 = build_aligned_type (TREE_TYPE (data_ref
),
7452 align
* BITS_PER_UNIT
);
7456 TREE_TYPE (data_ref
)
7457 = build_aligned_type (TREE_TYPE (data_ref
),
7458 TYPE_ALIGN (elem_type
));
7459 misalign
= DR_MISALIGNMENT (first_dr
);
7461 if (dataref_offset
== NULL_TREE
7462 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7463 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7467 case dr_explicit_realign
:
7471 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7473 if (compute_in_loop
)
7474 msq
= vect_setup_realignment (first_stmt
, gsi
,
7476 dr_explicit_realign
,
7479 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7480 ptr
= copy_ssa_name (dataref_ptr
);
7482 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7483 new_stmt
= gimple_build_assign
7484 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7486 (TREE_TYPE (dataref_ptr
),
7487 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7488 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7490 = build2 (MEM_REF
, vectype
, ptr
,
7491 build_int_cst (ref_type
, 0));
7492 vec_dest
= vect_create_destination_var (scalar_dest
,
7494 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7495 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7496 gimple_assign_set_lhs (new_stmt
, new_temp
);
7497 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7498 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7499 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7502 bump
= size_binop (MULT_EXPR
, vs
,
7503 TYPE_SIZE_UNIT (elem_type
));
7504 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7505 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7506 new_stmt
= gimple_build_assign
7507 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7510 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7511 ptr
= copy_ssa_name (ptr
, new_stmt
);
7512 gimple_assign_set_lhs (new_stmt
, ptr
);
7513 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7515 = build2 (MEM_REF
, vectype
, ptr
,
7516 build_int_cst (ref_type
, 0));
7519 case dr_explicit_realign_optimized
:
7520 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7521 new_temp
= copy_ssa_name (dataref_ptr
);
7523 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7524 new_stmt
= gimple_build_assign
7525 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7527 (TREE_TYPE (dataref_ptr
),
7528 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7529 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7531 = build2 (MEM_REF
, vectype
, new_temp
,
7532 build_int_cst (ref_type
, 0));
7537 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7538 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7539 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7540 gimple_assign_set_lhs (new_stmt
, new_temp
);
7541 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7543 /* 3. Handle explicit realignment if necessary/supported.
7545 vec_dest = realign_load (msq, lsq, realignment_token) */
7546 if (alignment_support_scheme
== dr_explicit_realign_optimized
7547 || alignment_support_scheme
== dr_explicit_realign
)
7549 lsq
= gimple_assign_lhs (new_stmt
);
7550 if (!realignment_token
)
7551 realignment_token
= dataref_ptr
;
7552 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7553 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7554 msq
, lsq
, realignment_token
);
7555 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7556 gimple_assign_set_lhs (new_stmt
, new_temp
);
7557 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7559 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7562 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7563 add_phi_arg (phi
, lsq
,
7564 loop_latch_edge (containing_loop
),
7570 /* 4. Handle invariant-load. */
7571 if (inv_p
&& !bb_vinfo
)
7573 gcc_assert (!grouped_load
);
7574 /* If we have versioned for aliasing or the loop doesn't
7575 have any data dependencies that would preclude this,
7576 then we are sure this is a loop invariant load and
7577 thus we can insert it on the preheader edge. */
7578 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7579 && !nested_in_vect_loop
7580 && hoist_defs_of_uses (stmt
, loop
))
7582 if (dump_enabled_p ())
7584 dump_printf_loc (MSG_NOTE
, vect_location
,
7585 "hoisting out of the vectorized "
7587 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7589 tree tem
= copy_ssa_name (scalar_dest
);
7590 gsi_insert_on_edge_immediate
7591 (loop_preheader_edge (loop
),
7592 gimple_build_assign (tem
,
7594 (gimple_assign_rhs1 (stmt
))));
7595 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7596 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7597 set_vinfo_for_stmt (new_stmt
,
7598 new_stmt_vec_info (new_stmt
, vinfo
));
7602 gimple_stmt_iterator gsi2
= *gsi
;
7604 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7606 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7610 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7612 tree perm_mask
= perm_mask_for_reverse (vectype
);
7613 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7614 perm_mask
, stmt
, gsi
);
7615 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7618 /* Collect vector loads and later create their permutation in
7619 vect_transform_grouped_load (). */
7620 if (grouped_load
|| slp_perm
)
7621 dr_chain
.quick_push (new_temp
);
7623 /* Store vector loads in the corresponding SLP_NODE. */
7624 if (slp
&& !slp_perm
)
7625 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7627 /* Bump the vector pointer to account for a gap or for excess
7628 elements loaded for a permuted SLP load. */
7629 if (group_gap_adj
!= 0)
7633 = wide_int_to_tree (sizetype
,
7634 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7635 group_gap_adj
, &ovf
));
7636 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7641 if (slp
&& !slp_perm
)
7647 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7648 slp_node_instance
, false,
7651 dr_chain
.release ();
7659 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7660 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7661 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7666 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7668 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7669 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7672 dr_chain
.release ();
7678 /* Function vect_is_simple_cond.
7681 LOOP - the loop that is being vectorized.
7682 COND - Condition that is checked for simple use.
7685 *COMP_VECTYPE - the vector type for the comparison.
7686 *DTS - The def types for the arguments of the comparison
7688 Returns whether a COND can be vectorized. Checks whether
7689 condition operands are supportable using vec_is_simple_use. */
7692 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
7693 tree
*comp_vectype
, enum vect_def_type
*dts
)
7696 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7699 if (TREE_CODE (cond
) == SSA_NAME
7700 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
7702 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7703 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7704 &dts
[0], comp_vectype
)
7706 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7711 if (!COMPARISON_CLASS_P (cond
))
7714 lhs
= TREE_OPERAND (cond
, 0);
7715 rhs
= TREE_OPERAND (cond
, 1);
7717 if (TREE_CODE (lhs
) == SSA_NAME
)
7719 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7720 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
7723 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
7724 || TREE_CODE (lhs
) == FIXED_CST
)
7725 dts
[0] = vect_constant_def
;
7729 if (TREE_CODE (rhs
) == SSA_NAME
)
7731 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7732 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
7735 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
7736 || TREE_CODE (rhs
) == FIXED_CST
)
7737 dts
[1] = vect_constant_def
;
7741 if (vectype1
&& vectype2
7742 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7745 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7749 /* vectorizable_condition.
7751 Check if STMT is conditional modify expression that can be vectorized.
7752 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7753 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7756 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7757 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7758 else clause if it is 2).
7760 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7763 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7764 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7767 tree scalar_dest
= NULL_TREE
;
7768 tree vec_dest
= NULL_TREE
;
7769 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
7770 tree then_clause
, else_clause
;
7771 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7772 tree comp_vectype
= NULL_TREE
;
7773 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7774 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7777 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7778 enum vect_def_type dts
[4]
7779 = {vect_unknown_def_type
, vect_unknown_def_type
,
7780 vect_unknown_def_type
, vect_unknown_def_type
};
7783 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7784 stmt_vec_info prev_stmt_info
= NULL
;
7786 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7787 vec
<tree
> vec_oprnds0
= vNULL
;
7788 vec
<tree
> vec_oprnds1
= vNULL
;
7789 vec
<tree
> vec_oprnds2
= vNULL
;
7790 vec
<tree
> vec_oprnds3
= vNULL
;
7792 bool masked
= false;
7794 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7797 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7799 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7802 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7803 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7807 /* FORNOW: not yet supported. */
7808 if (STMT_VINFO_LIVE_P (stmt_info
))
7810 if (dump_enabled_p ())
7811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7812 "value used after loop.\n");
7817 /* Is vectorizable conditional operation? */
7818 if (!is_gimple_assign (stmt
))
7821 code
= gimple_assign_rhs_code (stmt
);
7823 if (code
!= COND_EXPR
)
7826 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7827 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7828 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7833 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7835 gcc_assert (ncopies
>= 1);
7836 if (reduc_index
&& ncopies
> 1)
7837 return false; /* FORNOW */
7839 cond_expr
= gimple_assign_rhs1 (stmt
);
7840 then_clause
= gimple_assign_rhs2 (stmt
);
7841 else_clause
= gimple_assign_rhs3 (stmt
);
7843 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
7844 &comp_vectype
, &dts
[0])
7849 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
7852 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
7856 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7859 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7862 masked
= !COMPARISON_CLASS_P (cond_expr
);
7863 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7865 if (vec_cmp_type
== NULL_TREE
)
7868 cond_code
= TREE_CODE (cond_expr
);
7871 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
7872 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
7875 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
7877 /* Boolean values may have another representation in vectors
7878 and therefore we prefer bit operations over comparison for
7879 them (which also works for scalar masks). We store opcodes
7880 to use in bitop1 and bitop2. Statement is vectorized as
7881 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7882 depending on bitop1 and bitop2 arity. */
7886 bitop1
= BIT_NOT_EXPR
;
7887 bitop2
= BIT_AND_EXPR
;
7890 bitop1
= BIT_NOT_EXPR
;
7891 bitop2
= BIT_IOR_EXPR
;
7894 bitop1
= BIT_NOT_EXPR
;
7895 bitop2
= BIT_AND_EXPR
;
7896 std::swap (cond_expr0
, cond_expr1
);
7899 bitop1
= BIT_NOT_EXPR
;
7900 bitop2
= BIT_IOR_EXPR
;
7901 std::swap (cond_expr0
, cond_expr1
);
7904 bitop1
= BIT_XOR_EXPR
;
7907 bitop1
= BIT_XOR_EXPR
;
7908 bitop2
= BIT_NOT_EXPR
;
7913 cond_code
= SSA_NAME
;
7918 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7919 if (bitop1
!= NOP_EXPR
)
7921 machine_mode mode
= TYPE_MODE (comp_vectype
);
7924 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
7925 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7928 if (bitop2
!= NOP_EXPR
)
7930 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
7932 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7936 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
7939 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
7949 vec_oprnds0
.create (1);
7950 vec_oprnds1
.create (1);
7951 vec_oprnds2
.create (1);
7952 vec_oprnds3
.create (1);
7956 scalar_dest
= gimple_assign_lhs (stmt
);
7957 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7959 /* Handle cond expr. */
7960 for (j
= 0; j
< ncopies
; j
++)
7962 gassign
*new_stmt
= NULL
;
7967 auto_vec
<tree
, 4> ops
;
7968 auto_vec
<vec
<tree
>, 4> vec_defs
;
7971 ops
.safe_push (cond_expr
);
7974 ops
.safe_push (cond_expr0
);
7975 ops
.safe_push (cond_expr1
);
7977 ops
.safe_push (then_clause
);
7978 ops
.safe_push (else_clause
);
7979 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7980 vec_oprnds3
= vec_defs
.pop ();
7981 vec_oprnds2
= vec_defs
.pop ();
7983 vec_oprnds1
= vec_defs
.pop ();
7984 vec_oprnds0
= vec_defs
.pop ();
7992 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7994 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8000 = vect_get_vec_def_for_operand (cond_expr0
,
8001 stmt
, comp_vectype
);
8002 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8005 = vect_get_vec_def_for_operand (cond_expr1
,
8006 stmt
, comp_vectype
);
8007 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8009 if (reduc_index
== 1)
8010 vec_then_clause
= reduc_def
;
8013 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8015 vect_is_simple_use (then_clause
, loop_vinfo
,
8018 if (reduc_index
== 2)
8019 vec_else_clause
= reduc_def
;
8022 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8024 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8031 = vect_get_vec_def_for_stmt_copy (dts
[0],
8032 vec_oprnds0
.pop ());
8035 = vect_get_vec_def_for_stmt_copy (dts
[1],
8036 vec_oprnds1
.pop ());
8038 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8039 vec_oprnds2
.pop ());
8040 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8041 vec_oprnds3
.pop ());
8046 vec_oprnds0
.quick_push (vec_cond_lhs
);
8048 vec_oprnds1
.quick_push (vec_cond_rhs
);
8049 vec_oprnds2
.quick_push (vec_then_clause
);
8050 vec_oprnds3
.quick_push (vec_else_clause
);
8053 /* Arguments are ready. Create the new vector stmt. */
8054 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8056 vec_then_clause
= vec_oprnds2
[i
];
8057 vec_else_clause
= vec_oprnds3
[i
];
8060 vec_compare
= vec_cond_lhs
;
8063 vec_cond_rhs
= vec_oprnds1
[i
];
8064 if (bitop1
== NOP_EXPR
)
8065 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8066 vec_cond_lhs
, vec_cond_rhs
);
8069 new_temp
= make_ssa_name (vec_cmp_type
);
8070 if (bitop1
== BIT_NOT_EXPR
)
8071 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8075 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8077 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8078 if (bitop2
== NOP_EXPR
)
8079 vec_compare
= new_temp
;
8080 else if (bitop2
== BIT_NOT_EXPR
)
8082 /* Instead of doing ~x ? y : z do x ? z : y. */
8083 vec_compare
= new_temp
;
8084 std::swap (vec_then_clause
, vec_else_clause
);
8088 vec_compare
= make_ssa_name (vec_cmp_type
);
8090 = gimple_build_assign (vec_compare
, bitop2
,
8091 vec_cond_lhs
, new_temp
);
8092 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8096 new_temp
= make_ssa_name (vec_dest
);
8097 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8098 vec_compare
, vec_then_clause
,
8100 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8102 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8109 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8111 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8113 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8116 vec_oprnds0
.release ();
8117 vec_oprnds1
.release ();
8118 vec_oprnds2
.release ();
8119 vec_oprnds3
.release ();
8124 /* vectorizable_comparison.
8126 Check if STMT is comparison expression that can be vectorized.
8127 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8128 comparison, put it in VEC_STMT, and insert it at GSI.
8130 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8133 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8134 gimple
**vec_stmt
, tree reduc_def
,
8137 tree lhs
, rhs1
, rhs2
;
8138 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8139 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8140 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8141 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8143 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8144 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8148 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8149 stmt_vec_info prev_stmt_info
= NULL
;
8151 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8152 vec
<tree
> vec_oprnds0
= vNULL
;
8153 vec
<tree
> vec_oprnds1
= vNULL
;
8158 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8161 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8164 mask_type
= vectype
;
8165 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8170 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
8172 gcc_assert (ncopies
>= 1);
8173 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8174 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8178 if (STMT_VINFO_LIVE_P (stmt_info
))
8180 if (dump_enabled_p ())
8181 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8182 "value used after loop.\n");
8186 if (!is_gimple_assign (stmt
))
8189 code
= gimple_assign_rhs_code (stmt
);
8191 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8194 rhs1
= gimple_assign_rhs1 (stmt
);
8195 rhs2
= gimple_assign_rhs2 (stmt
);
8197 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8198 &dts
[0], &vectype1
))
8201 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8202 &dts
[1], &vectype2
))
8205 if (vectype1
&& vectype2
8206 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
8209 vectype
= vectype1
? vectype1
: vectype2
;
8211 /* Invariant comparison. */
8214 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8215 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
8218 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
8221 /* Can't compare mask and non-mask types. */
8222 if (vectype1
&& vectype2
8223 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8226 /* Boolean values may have another representation in vectors
8227 and therefore we prefer bit operations over comparison for
8228 them (which also works for scalar masks). We store opcodes
8229 to use in bitop1 and bitop2. Statement is vectorized as
8230 BITOP2 (rhs1 BITOP1 rhs2) or
8231 rhs1 BITOP2 (BITOP1 rhs2)
8232 depending on bitop1 and bitop2 arity. */
8233 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8235 if (code
== GT_EXPR
)
8237 bitop1
= BIT_NOT_EXPR
;
8238 bitop2
= BIT_AND_EXPR
;
8240 else if (code
== GE_EXPR
)
8242 bitop1
= BIT_NOT_EXPR
;
8243 bitop2
= BIT_IOR_EXPR
;
8245 else if (code
== LT_EXPR
)
8247 bitop1
= BIT_NOT_EXPR
;
8248 bitop2
= BIT_AND_EXPR
;
8249 std::swap (rhs1
, rhs2
);
8250 std::swap (dts
[0], dts
[1]);
8252 else if (code
== LE_EXPR
)
8254 bitop1
= BIT_NOT_EXPR
;
8255 bitop2
= BIT_IOR_EXPR
;
8256 std::swap (rhs1
, rhs2
);
8257 std::swap (dts
[0], dts
[1]);
8261 bitop1
= BIT_XOR_EXPR
;
8262 if (code
== EQ_EXPR
)
8263 bitop2
= BIT_NOT_EXPR
;
8269 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8270 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8271 dts
, ndts
, NULL
, NULL
);
8272 if (bitop1
== NOP_EXPR
)
8273 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8276 machine_mode mode
= TYPE_MODE (vectype
);
8279 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8280 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8283 if (bitop2
!= NOP_EXPR
)
8285 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8286 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8296 vec_oprnds0
.create (1);
8297 vec_oprnds1
.create (1);
8301 lhs
= gimple_assign_lhs (stmt
);
8302 mask
= vect_create_destination_var (lhs
, mask_type
);
8304 /* Handle cmp expr. */
8305 for (j
= 0; j
< ncopies
; j
++)
8307 gassign
*new_stmt
= NULL
;
8312 auto_vec
<tree
, 2> ops
;
8313 auto_vec
<vec
<tree
>, 2> vec_defs
;
8315 ops
.safe_push (rhs1
);
8316 ops
.safe_push (rhs2
);
8317 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
8318 vec_oprnds1
= vec_defs
.pop ();
8319 vec_oprnds0
= vec_defs
.pop ();
8323 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8324 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8329 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8330 vec_oprnds0
.pop ());
8331 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8332 vec_oprnds1
.pop ());
8337 vec_oprnds0
.quick_push (vec_rhs1
);
8338 vec_oprnds1
.quick_push (vec_rhs2
);
8341 /* Arguments are ready. Create the new vector stmt. */
8342 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8344 vec_rhs2
= vec_oprnds1
[i
];
8346 new_temp
= make_ssa_name (mask
);
8347 if (bitop1
== NOP_EXPR
)
8349 new_stmt
= gimple_build_assign (new_temp
, code
,
8350 vec_rhs1
, vec_rhs2
);
8351 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8355 if (bitop1
== BIT_NOT_EXPR
)
8356 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8358 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8360 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8361 if (bitop2
!= NOP_EXPR
)
8363 tree res
= make_ssa_name (mask
);
8364 if (bitop2
== BIT_NOT_EXPR
)
8365 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8367 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8369 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8373 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8380 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8382 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8384 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8387 vec_oprnds0
.release ();
8388 vec_oprnds1
.release ();
8393 /* Make sure the statement is vectorizable. */
8396 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
8398 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8399 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8400 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8402 tree scalar_type
, vectype
;
8403 gimple
*pattern_stmt
;
8404 gimple_seq pattern_def_seq
;
8406 if (dump_enabled_p ())
8408 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8409 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8412 if (gimple_has_volatile_ops (stmt
))
8414 if (dump_enabled_p ())
8415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8416 "not vectorized: stmt has volatile operands\n");
8421 /* Skip stmts that do not need to be vectorized. In loops this is expected
8423 - the COND_EXPR which is the loop exit condition
8424 - any LABEL_EXPRs in the loop
8425 - computations that are used only for array indexing or loop control.
8426 In basic blocks we only analyze statements that are a part of some SLP
8427 instance, therefore, all the statements are relevant.
8429 Pattern statement needs to be analyzed instead of the original statement
8430 if the original statement is not relevant. Otherwise, we analyze both
8431 statements. In basic blocks we are called from some SLP instance
8432 traversal, don't analyze pattern stmts instead, the pattern stmts
8433 already will be part of SLP instance. */
8435 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8436 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8437 && !STMT_VINFO_LIVE_P (stmt_info
))
8439 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8441 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8442 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8444 /* Analyze PATTERN_STMT instead of the original stmt. */
8445 stmt
= pattern_stmt
;
8446 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8447 if (dump_enabled_p ())
8449 dump_printf_loc (MSG_NOTE
, vect_location
,
8450 "==> examining pattern statement: ");
8451 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8456 if (dump_enabled_p ())
8457 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8462 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8465 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8466 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8468 /* Analyze PATTERN_STMT too. */
8469 if (dump_enabled_p ())
8471 dump_printf_loc (MSG_NOTE
, vect_location
,
8472 "==> examining pattern statement: ");
8473 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8476 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
8480 if (is_pattern_stmt_p (stmt_info
)
8482 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8484 gimple_stmt_iterator si
;
8486 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8488 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8489 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8490 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8492 /* Analyze def stmt of STMT if it's a pattern stmt. */
8493 if (dump_enabled_p ())
8495 dump_printf_loc (MSG_NOTE
, vect_location
,
8496 "==> examining pattern def statement: ");
8497 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8500 if (!vect_analyze_stmt (pattern_def_stmt
,
8501 need_to_vectorize
, node
))
8507 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8509 case vect_internal_def
:
8512 case vect_reduction_def
:
8513 case vect_nested_cycle
:
8514 gcc_assert (!bb_vinfo
8515 && (relevance
== vect_used_in_outer
8516 || relevance
== vect_used_in_outer_by_reduction
8517 || relevance
== vect_used_by_reduction
8518 || relevance
== vect_unused_in_scope
8519 || relevance
== vect_used_only_live
));
8522 case vect_induction_def
:
8523 gcc_assert (!bb_vinfo
);
8526 case vect_constant_def
:
8527 case vect_external_def
:
8528 case vect_unknown_def_type
:
8535 gcc_assert (PURE_SLP_STMT (stmt_info
));
8537 /* Memory accesses already got their vector type assigned
8538 in vect_analyze_data_refs. */
8539 if (! STMT_VINFO_DATA_REF (stmt_info
))
8541 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8542 if (dump_enabled_p ())
8544 dump_printf_loc (MSG_NOTE
, vect_location
,
8545 "get vectype for scalar type: ");
8546 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8547 dump_printf (MSG_NOTE
, "\n");
8550 vectype
= get_vectype_for_scalar_type (scalar_type
);
8553 if (dump_enabled_p ())
8555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8556 "not SLPed: unsupported data-type ");
8557 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8559 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8564 if (dump_enabled_p ())
8566 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8567 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8568 dump_printf (MSG_NOTE
, "\n");
8571 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8575 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8577 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8578 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8579 || (is_gimple_call (stmt
)
8580 && gimple_call_lhs (stmt
) == NULL_TREE
));
8581 *need_to_vectorize
= true;
8584 if (PURE_SLP_STMT (stmt_info
) && !node
)
8586 dump_printf_loc (MSG_NOTE
, vect_location
,
8587 "handled only by SLP analysis\n");
8593 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8594 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8595 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8596 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8597 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8598 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8599 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8600 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8601 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8602 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8603 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8604 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
8605 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8606 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8610 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8611 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8612 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8613 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8614 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8615 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8616 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8617 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8618 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8619 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8624 if (dump_enabled_p ())
8626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8627 "not vectorized: relevant stmt not ");
8628 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8629 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8638 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8639 need extra handling, except for vectorizable reductions. */
8640 if (STMT_VINFO_LIVE_P (stmt_info
)
8641 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8642 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
, -1, NULL
);
8646 if (dump_enabled_p ())
8648 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8649 "not vectorized: live stmt not ");
8650 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8651 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8661 /* Function vect_transform_stmt.
8663 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8666 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8667 bool *grouped_store
, slp_tree slp_node
,
8668 slp_instance slp_node_instance
)
8670 bool is_store
= false;
8671 gimple
*vec_stmt
= NULL
;
8672 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8675 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8676 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8678 switch (STMT_VINFO_TYPE (stmt_info
))
8680 case type_demotion_vec_info_type
:
8681 case type_promotion_vec_info_type
:
8682 case type_conversion_vec_info_type
:
8683 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8687 case induc_vec_info_type
:
8688 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
8692 case shift_vec_info_type
:
8693 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8697 case op_vec_info_type
:
8698 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8702 case assignment_vec_info_type
:
8703 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8707 case load_vec_info_type
:
8708 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8713 case store_vec_info_type
:
8714 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8716 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8718 /* In case of interleaving, the whole chain is vectorized when the
8719 last store in the chain is reached. Store stmts before the last
8720 one are skipped, and there vec_stmt_info shouldn't be freed
8722 *grouped_store
= true;
8723 if (STMT_VINFO_VEC_STMT (stmt_info
))
8730 case condition_vec_info_type
:
8731 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8735 case comparison_vec_info_type
:
8736 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8740 case call_vec_info_type
:
8741 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8742 stmt
= gsi_stmt (*gsi
);
8743 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
8747 case call_simd_clone_vec_info_type
:
8748 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8749 stmt
= gsi_stmt (*gsi
);
8752 case reduc_vec_info_type
:
8753 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8758 if (!STMT_VINFO_LIVE_P (stmt_info
))
8760 if (dump_enabled_p ())
8761 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8762 "stmt not supported.\n");
8767 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8768 This would break hybrid SLP vectorization. */
8770 gcc_assert (!vec_stmt
8771 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8773 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8774 is being vectorized, but outside the immediately enclosing loop. */
8776 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8777 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8778 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8779 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8780 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8781 || STMT_VINFO_RELEVANT (stmt_info
) ==
8782 vect_used_in_outer_by_reduction
))
8784 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8785 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8786 imm_use_iterator imm_iter
;
8787 use_operand_p use_p
;
8791 if (dump_enabled_p ())
8792 dump_printf_loc (MSG_NOTE
, vect_location
,
8793 "Record the vdef for outer-loop vectorization.\n");
8795 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8796 (to be used when vectorizing outer-loop stmts that use the DEF of
8798 if (gimple_code (stmt
) == GIMPLE_PHI
)
8799 scalar_dest
= PHI_RESULT (stmt
);
8801 scalar_dest
= gimple_assign_lhs (stmt
);
8803 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8805 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8807 exit_phi
= USE_STMT (use_p
);
8808 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8813 /* Handle stmts whose DEF is used outside the loop-nest that is
8814 being vectorized. */
8819 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8821 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8822 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8823 && STMT_VINFO_TYPE (slp_stmt_info
) != reduc_vec_info_type
)
8825 done
= vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8831 else if (STMT_VINFO_LIVE_P (stmt_info
)
8832 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8834 done
= vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, &vec_stmt
);
8839 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8845 /* Remove a group of stores (for SLP or interleaving), free their
8849 vect_remove_stores (gimple
*first_stmt
)
8851 gimple
*next
= first_stmt
;
8853 gimple_stmt_iterator next_si
;
8857 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8859 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8860 if (is_pattern_stmt_p (stmt_info
))
8861 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8862 /* Free the attached stmt_vec_info and remove the stmt. */
8863 next_si
= gsi_for_stmt (next
);
8864 unlink_stmt_vdef (next
);
8865 gsi_remove (&next_si
, true);
8866 release_defs (next
);
8867 free_stmt_vec_info (next
);
8873 /* Function new_stmt_vec_info.
8875 Create and initialize a new stmt_vec_info struct for STMT. */
8878 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8881 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8883 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8884 STMT_VINFO_STMT (res
) = stmt
;
8886 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8887 STMT_VINFO_LIVE_P (res
) = false;
8888 STMT_VINFO_VECTYPE (res
) = NULL
;
8889 STMT_VINFO_VEC_STMT (res
) = NULL
;
8890 STMT_VINFO_VECTORIZABLE (res
) = true;
8891 STMT_VINFO_IN_PATTERN_P (res
) = false;
8892 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8893 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8894 STMT_VINFO_DATA_REF (res
) = NULL
;
8895 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8896 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
8898 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8899 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8900 STMT_VINFO_DR_INIT (res
) = NULL
;
8901 STMT_VINFO_DR_STEP (res
) = NULL
;
8902 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8904 if (gimple_code (stmt
) == GIMPLE_PHI
8905 && is_loop_header_bb_p (gimple_bb (stmt
)))
8906 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8908 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8910 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8911 STMT_SLP_TYPE (res
) = loop_vect
;
8912 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8914 GROUP_FIRST_ELEMENT (res
) = NULL
;
8915 GROUP_NEXT_ELEMENT (res
) = NULL
;
8916 GROUP_SIZE (res
) = 0;
8917 GROUP_STORE_COUNT (res
) = 0;
8918 GROUP_GAP (res
) = 0;
8919 GROUP_SAME_DR_STMT (res
) = NULL
;
8925 /* Create a hash table for stmt_vec_info. */
8928 init_stmt_vec_info_vec (void)
8930 gcc_assert (!stmt_vec_info_vec
.exists ());
8931 stmt_vec_info_vec
.create (50);
8935 /* Free hash table for stmt_vec_info. */
8938 free_stmt_vec_info_vec (void)
8942 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8944 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8945 gcc_assert (stmt_vec_info_vec
.exists ());
8946 stmt_vec_info_vec
.release ();
8950 /* Free stmt vectorization related info. */
8953 free_stmt_vec_info (gimple
*stmt
)
8955 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8960 /* Check if this statement has a related "pattern stmt"
8961 (introduced by the vectorizer during the pattern recognition
8962 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8964 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8966 stmt_vec_info patt_info
8967 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8970 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8971 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8972 gimple_set_bb (patt_stmt
, NULL
);
8973 tree lhs
= gimple_get_lhs (patt_stmt
);
8974 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8975 release_ssa_name (lhs
);
8978 gimple_stmt_iterator si
;
8979 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8981 gimple
*seq_stmt
= gsi_stmt (si
);
8982 gimple_set_bb (seq_stmt
, NULL
);
8983 lhs
= gimple_get_lhs (seq_stmt
);
8984 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8985 release_ssa_name (lhs
);
8986 free_stmt_vec_info (seq_stmt
);
8989 free_stmt_vec_info (patt_stmt
);
8993 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8994 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8995 set_vinfo_for_stmt (stmt
, NULL
);
9000 /* Function get_vectype_for_scalar_type_and_size.
9002 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9006 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
9008 tree orig_scalar_type
= scalar_type
;
9009 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
9010 machine_mode simd_mode
;
9011 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9018 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
9019 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
9022 /* For vector types of elements whose mode precision doesn't
9023 match their types precision we use a element type of mode
9024 precision. The vectorization routines will have to make sure
9025 they support the proper result truncation/extension.
9026 We also make sure to build vector types with INTEGER_TYPE
9027 component type only. */
9028 if (INTEGRAL_TYPE_P (scalar_type
)
9029 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9030 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9031 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9032 TYPE_UNSIGNED (scalar_type
));
9034 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9035 When the component mode passes the above test simply use a type
9036 corresponding to that mode. The theory is that any use that
9037 would cause problems with this will disable vectorization anyway. */
9038 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9039 && !INTEGRAL_TYPE_P (scalar_type
))
9040 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9042 /* We can't build a vector type of elements with alignment bigger than
9044 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9045 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9046 TYPE_UNSIGNED (scalar_type
));
9048 /* If we felt back to using the mode fail if there was
9049 no scalar type for it. */
9050 if (scalar_type
== NULL_TREE
)
9053 /* If no size was supplied use the mode the target prefers. Otherwise
9054 lookup a vector mode of the specified size. */
9056 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9058 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
9059 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
9063 vectype
= build_vector_type (scalar_type
, nunits
);
9065 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9066 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9069 /* Re-attach the address-space qualifier if we canonicalized the scalar
9071 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9072 return build_qualified_type
9073 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9078 unsigned int current_vector_size
;
9080 /* Function get_vectype_for_scalar_type.
9082 Returns the vector type corresponding to SCALAR_TYPE as supported
9086 get_vectype_for_scalar_type (tree scalar_type
)
9089 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9090 current_vector_size
);
9092 && current_vector_size
== 0)
9093 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9097 /* Function get_mask_type_for_scalar_type.
9099 Returns the mask type corresponding to a result of comparison
9100 of vectors of specified SCALAR_TYPE as supported by target. */
9103 get_mask_type_for_scalar_type (tree scalar_type
)
9105 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9110 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9111 current_vector_size
);
9114 /* Function get_same_sized_vectype
9116 Returns a vector type corresponding to SCALAR_TYPE of size
9117 VECTOR_TYPE if supported by the target. */
9120 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9122 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9123 return build_same_sized_truth_vector_type (vector_type
);
9125 return get_vectype_for_scalar_type_and_size
9126 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9129 /* Function vect_is_simple_use.
9132 VINFO - the vect info of the loop or basic block that is being vectorized.
9133 OPERAND - operand in the loop or bb.
9135 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9136 DT - the type of definition
9138 Returns whether a stmt with OPERAND can be vectorized.
9139 For loops, supportable operands are constants, loop invariants, and operands
9140 that are defined by the current iteration of the loop. Unsupportable
9141 operands are those that are defined by a previous iteration of the loop (as
9142 is the case in reduction/induction computations).
9143 For basic blocks, supportable operands are constants and bb invariants.
9144 For now, operands defined outside the basic block are not supported. */
9147 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9148 gimple
**def_stmt
, enum vect_def_type
*dt
)
9151 *dt
= vect_unknown_def_type
;
9153 if (dump_enabled_p ())
9155 dump_printf_loc (MSG_NOTE
, vect_location
,
9156 "vect_is_simple_use: operand ");
9157 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9158 dump_printf (MSG_NOTE
, "\n");
9161 if (CONSTANT_CLASS_P (operand
))
9163 *dt
= vect_constant_def
;
9167 if (is_gimple_min_invariant (operand
))
9169 *dt
= vect_external_def
;
9173 if (TREE_CODE (operand
) != SSA_NAME
)
9175 if (dump_enabled_p ())
9176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9181 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9183 *dt
= vect_external_def
;
9187 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9188 if (dump_enabled_p ())
9190 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9191 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9194 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9195 *dt
= vect_external_def
;
9198 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9199 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9202 if (dump_enabled_p ())
9204 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9207 case vect_uninitialized_def
:
9208 dump_printf (MSG_NOTE
, "uninitialized\n");
9210 case vect_constant_def
:
9211 dump_printf (MSG_NOTE
, "constant\n");
9213 case vect_external_def
:
9214 dump_printf (MSG_NOTE
, "external\n");
9216 case vect_internal_def
:
9217 dump_printf (MSG_NOTE
, "internal\n");
9219 case vect_induction_def
:
9220 dump_printf (MSG_NOTE
, "induction\n");
9222 case vect_reduction_def
:
9223 dump_printf (MSG_NOTE
, "reduction\n");
9225 case vect_double_reduction_def
:
9226 dump_printf (MSG_NOTE
, "double reduction\n");
9228 case vect_nested_cycle
:
9229 dump_printf (MSG_NOTE
, "nested cycle\n");
9231 case vect_unknown_def_type
:
9232 dump_printf (MSG_NOTE
, "unknown\n");
9237 if (*dt
== vect_unknown_def_type
)
9239 if (dump_enabled_p ())
9240 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9241 "Unsupported pattern.\n");
9245 switch (gimple_code (*def_stmt
))
9252 if (dump_enabled_p ())
9253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9254 "unsupported defining stmt:\n");
9261 /* Function vect_is_simple_use.
9263 Same as vect_is_simple_use but also determines the vector operand
9264 type of OPERAND and stores it to *VECTYPE. If the definition of
9265 OPERAND is vect_uninitialized_def, vect_constant_def or
9266 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9267 is responsible to compute the best suited vector type for the
9271 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9272 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9274 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9277 /* Now get a vector type if the def is internal, otherwise supply
9278 NULL_TREE and leave it up to the caller to figure out a proper
9279 type for the use stmt. */
9280 if (*dt
== vect_internal_def
9281 || *dt
== vect_induction_def
9282 || *dt
== vect_reduction_def
9283 || *dt
== vect_double_reduction_def
9284 || *dt
== vect_nested_cycle
)
9286 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9288 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9289 && !STMT_VINFO_RELEVANT (stmt_info
)
9290 && !STMT_VINFO_LIVE_P (stmt_info
))
9291 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9293 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9294 gcc_assert (*vectype
!= NULL_TREE
);
9296 else if (*dt
== vect_uninitialized_def
9297 || *dt
== vect_constant_def
9298 || *dt
== vect_external_def
)
9299 *vectype
= NULL_TREE
;
9307 /* Function supportable_widening_operation
9309 Check whether an operation represented by the code CODE is a
9310 widening operation that is supported by the target platform in
9311 vector form (i.e., when operating on arguments of type VECTYPE_IN
9312 producing a result of type VECTYPE_OUT).
9314 Widening operations we currently support are NOP (CONVERT), FLOAT
9315 and WIDEN_MULT. This function checks if these operations are supported
9316 by the target platform either directly (via vector tree-codes), or via
9320 - CODE1 and CODE2 are codes of vector operations to be used when
9321 vectorizing the operation, if available.
9322 - MULTI_STEP_CVT determines the number of required intermediate steps in
9323 case of multi-step conversion (like char->short->int - in that case
9324 MULTI_STEP_CVT will be 1).
9325 - INTERM_TYPES contains the intermediate type required to perform the
9326 widening operation (short in the above example). */
9329 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9330 tree vectype_out
, tree vectype_in
,
9331 enum tree_code
*code1
, enum tree_code
*code2
,
9332 int *multi_step_cvt
,
9333 vec
<tree
> *interm_types
)
9335 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9336 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9337 struct loop
*vect_loop
= NULL
;
9338 machine_mode vec_mode
;
9339 enum insn_code icode1
, icode2
;
9340 optab optab1
, optab2
;
9341 tree vectype
= vectype_in
;
9342 tree wide_vectype
= vectype_out
;
9343 enum tree_code c1
, c2
;
9345 tree prev_type
, intermediate_type
;
9346 machine_mode intermediate_mode
, prev_mode
;
9347 optab optab3
, optab4
;
9349 *multi_step_cvt
= 0;
9351 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9355 case WIDEN_MULT_EXPR
:
9356 /* The result of a vectorized widening operation usually requires
9357 two vectors (because the widened results do not fit into one vector).
9358 The generated vector results would normally be expected to be
9359 generated in the same order as in the original scalar computation,
9360 i.e. if 8 results are generated in each vector iteration, they are
9361 to be organized as follows:
9362 vect1: [res1,res2,res3,res4],
9363 vect2: [res5,res6,res7,res8].
9365 However, in the special case that the result of the widening
9366 operation is used in a reduction computation only, the order doesn't
9367 matter (because when vectorizing a reduction we change the order of
9368 the computation). Some targets can take advantage of this and
9369 generate more efficient code. For example, targets like Altivec,
9370 that support widen_mult using a sequence of {mult_even,mult_odd}
9371 generate the following vectors:
9372 vect1: [res1,res3,res5,res7],
9373 vect2: [res2,res4,res6,res8].
9375 When vectorizing outer-loops, we execute the inner-loop sequentially
9376 (each vectorized inner-loop iteration contributes to VF outer-loop
9377 iterations in parallel). We therefore don't allow to change the
9378 order of the computation in the inner-loop during outer-loop
9380 /* TODO: Another case in which order doesn't *really* matter is when we
9381 widen and then contract again, e.g. (short)((int)x * y >> 8).
9382 Normally, pack_trunc performs an even/odd permute, whereas the
9383 repack from an even/odd expansion would be an interleave, which
9384 would be significantly simpler for e.g. AVX2. */
9385 /* In any case, in order to avoid duplicating the code below, recurse
9386 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9387 are properly set up for the caller. If we fail, we'll continue with
9388 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9390 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9391 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9392 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9393 stmt
, vectype_out
, vectype_in
,
9394 code1
, code2
, multi_step_cvt
,
9397 /* Elements in a vector with vect_used_by_reduction property cannot
9398 be reordered if the use chain with this property does not have the
9399 same operation. One such an example is s += a * b, where elements
9400 in a and b cannot be reordered. Here we check if the vector defined
9401 by STMT is only directly used in the reduction statement. */
9402 tree lhs
= gimple_assign_lhs (stmt
);
9403 use_operand_p dummy
;
9405 stmt_vec_info use_stmt_info
= NULL
;
9406 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9407 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9408 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9411 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9412 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9425 case VEC_WIDEN_MULT_EVEN_EXPR
:
9426 /* Support the recursion induced just above. */
9427 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9428 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9431 case WIDEN_LSHIFT_EXPR
:
9432 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9433 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9437 c1
= VEC_UNPACK_LO_EXPR
;
9438 c2
= VEC_UNPACK_HI_EXPR
;
9442 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9443 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9446 case FIX_TRUNC_EXPR
:
9447 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9448 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9449 computing the operation. */
9456 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9459 if (code
== FIX_TRUNC_EXPR
)
9461 /* The signedness is determined from output operand. */
9462 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9463 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9467 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9468 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9471 if (!optab1
|| !optab2
)
9474 vec_mode
= TYPE_MODE (vectype
);
9475 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9476 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9482 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9483 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9484 /* For scalar masks we may have different boolean
9485 vector types having the same QImode. Thus we
9486 add additional check for elements number. */
9487 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9488 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9489 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9491 /* Check if it's a multi-step conversion that can be done using intermediate
9494 prev_type
= vectype
;
9495 prev_mode
= vec_mode
;
9497 if (!CONVERT_EXPR_CODE_P (code
))
9500 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9501 intermediate steps in promotion sequence. We try
9502 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9504 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9505 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9507 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9508 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9511 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9512 current_vector_size
);
9513 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9518 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9519 TYPE_UNSIGNED (prev_type
));
9521 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9522 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9524 if (!optab3
|| !optab4
9525 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9526 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9527 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9528 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9529 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9530 == CODE_FOR_nothing
)
9531 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9532 == CODE_FOR_nothing
))
9535 interm_types
->quick_push (intermediate_type
);
9536 (*multi_step_cvt
)++;
9538 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9539 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9540 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9541 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9542 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9544 prev_type
= intermediate_type
;
9545 prev_mode
= intermediate_mode
;
9548 interm_types
->release ();
9553 /* Function supportable_narrowing_operation
9555 Check whether an operation represented by the code CODE is a
9556 narrowing operation that is supported by the target platform in
9557 vector form (i.e., when operating on arguments of type VECTYPE_IN
9558 and producing a result of type VECTYPE_OUT).
9560 Narrowing operations we currently support are NOP (CONVERT) and
9561 FIX_TRUNC. This function checks if these operations are supported by
9562 the target platform directly via vector tree-codes.
9565 - CODE1 is the code of a vector operation to be used when
9566 vectorizing the operation, if available.
9567 - MULTI_STEP_CVT determines the number of required intermediate steps in
9568 case of multi-step conversion (like int->short->char - in that case
9569 MULTI_STEP_CVT will be 1).
9570 - INTERM_TYPES contains the intermediate type required to perform the
9571 narrowing operation (short in the above example). */
9574 supportable_narrowing_operation (enum tree_code code
,
9575 tree vectype_out
, tree vectype_in
,
9576 enum tree_code
*code1
, int *multi_step_cvt
,
9577 vec
<tree
> *interm_types
)
9579 machine_mode vec_mode
;
9580 enum insn_code icode1
;
9581 optab optab1
, interm_optab
;
9582 tree vectype
= vectype_in
;
9583 tree narrow_vectype
= vectype_out
;
9585 tree intermediate_type
, prev_type
;
9586 machine_mode intermediate_mode
, prev_mode
;
9590 *multi_step_cvt
= 0;
9594 c1
= VEC_PACK_TRUNC_EXPR
;
9597 case FIX_TRUNC_EXPR
:
9598 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9602 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9603 tree code and optabs used for computing the operation. */
9610 if (code
== FIX_TRUNC_EXPR
)
9611 /* The signedness is determined from output operand. */
9612 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9614 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9619 vec_mode
= TYPE_MODE (vectype
);
9620 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9625 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9626 /* For scalar masks we may have different boolean
9627 vector types having the same QImode. Thus we
9628 add additional check for elements number. */
9629 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9630 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9631 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9633 /* Check if it's a multi-step conversion that can be done using intermediate
9635 prev_mode
= vec_mode
;
9636 prev_type
= vectype
;
9637 if (code
== FIX_TRUNC_EXPR
)
9638 uns
= TYPE_UNSIGNED (vectype_out
);
9640 uns
= TYPE_UNSIGNED (vectype
);
9642 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9643 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9644 costly than signed. */
9645 if (code
== FIX_TRUNC_EXPR
&& uns
)
9647 enum insn_code icode2
;
9650 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9652 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9653 if (interm_optab
!= unknown_optab
9654 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9655 && insn_data
[icode1
].operand
[0].mode
9656 == insn_data
[icode2
].operand
[0].mode
)
9659 optab1
= interm_optab
;
9664 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9665 intermediate steps in promotion sequence. We try
9666 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9667 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9668 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9670 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9671 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9674 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9675 current_vector_size
);
9676 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9681 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9683 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9686 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9687 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9688 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9689 == CODE_FOR_nothing
))
9692 interm_types
->quick_push (intermediate_type
);
9693 (*multi_step_cvt
)++;
9695 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9696 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9697 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9698 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9700 prev_mode
= intermediate_mode
;
9701 prev_type
= intermediate_type
;
9702 optab1
= interm_optab
;
9705 interm_types
->release ();