1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
35 #include "gimple-pretty-print.h"
36 #include "internal-fn.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
42 #include "tree-ssa-loop-manip.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-scalar-evolution.h"
47 #include "insn-config.h"
48 #include "recog.h" /* FIXME: for insn_data */
49 #include "insn-codes.h"
50 #include "optabs-tree.h"
51 #include "diagnostic-core.h"
52 #include "tree-vectorizer.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
96 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
97 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
98 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
101 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
105 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
106 count
, kind
, stmt_info
, misalign
, where
);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
112 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
114 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT and the vector is associated
121 with scalar destination SCALAR_DEST. */
124 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
125 tree array
, unsigned HOST_WIDE_INT n
)
127 tree vect_type
, vect
, vect_name
, array_ref
;
130 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
131 vect_type
= TREE_TYPE (TREE_TYPE (array
));
132 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
133 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
134 build_int_cst (size_type_node
, n
),
135 NULL_TREE
, NULL_TREE
);
137 new_stmt
= gimple_build_assign (vect
, array_ref
);
138 vect_name
= make_ssa_name (vect
, new_stmt
);
139 gimple_assign_set_lhs (new_stmt
, vect_name
);
140 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT. */
150 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
151 tree array
, unsigned HOST_WIDE_INT n
)
156 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
157 build_int_cst (size_type_node
, n
),
158 NULL_TREE
, NULL_TREE
);
160 new_stmt
= gimple_build_assign (array_ref
, vect
);
161 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
169 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
171 tree mem_ref
, alias_ptr_type
;
173 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
174 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
175 /* Arrays have the same alignment as their type. */
176 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
180 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
182 /* Function vect_mark_relevant.
184 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
187 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
188 enum vect_relevant relevant
, bool live_p
,
189 bool used_in_pattern
)
191 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
192 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
193 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
194 gimple
*pattern_stmt
;
196 if (dump_enabled_p ())
197 dump_printf_loc (MSG_NOTE
, vect_location
,
198 "mark relevant %d, live %d.\n", relevant
, live_p
);
200 /* If this stmt is an original stmt in a pattern, we might need to mark its
201 related pattern stmt instead of the original stmt. However, such stmts
202 may have their own uses that are not in any pattern, in such cases the
203 stmt itself should be marked. */
204 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
207 if (!used_in_pattern
)
209 imm_use_iterator imm_iter
;
213 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
214 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
216 if (is_gimple_assign (stmt
))
217 lhs
= gimple_assign_lhs (stmt
);
219 lhs
= gimple_call_lhs (stmt
);
221 /* This use is out of pattern use, if LHS has other uses that are
222 pattern uses, we should mark the stmt itself, and not the pattern
224 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
225 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
227 if (is_gimple_debug (USE_STMT (use_p
)))
229 use_stmt
= USE_STMT (use_p
);
231 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
234 if (vinfo_for_stmt (use_stmt
)
235 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
245 /* This is the last stmt in a sequence that was detected as a
246 pattern that can potentially be vectorized. Don't mark the stmt
247 as relevant/live because it's not going to be vectorized.
248 Instead mark the pattern-stmt that replaces it. */
250 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
252 if (dump_enabled_p ())
253 dump_printf_loc (MSG_NOTE
, vect_location
,
254 "last stmt in pattern. don't mark"
255 " relevant/live.\n");
256 stmt_info
= vinfo_for_stmt (pattern_stmt
);
257 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
258 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
259 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
264 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
265 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
266 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
268 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
269 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
271 if (dump_enabled_p ())
272 dump_printf_loc (MSG_NOTE
, vect_location
,
273 "already marked relevant/live.\n");
277 worklist
->safe_push (stmt
);
281 /* Function vect_stmt_relevant_p.
283 Return true if STMT in loop that is represented by LOOP_VINFO is
284 "relevant for vectorization".
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
291 CHECKME: what other side effects would the vectorizer allow? */
294 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
295 enum vect_relevant
*relevant
, bool *live_p
)
297 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
299 imm_use_iterator imm_iter
;
303 *relevant
= vect_unused_in_scope
;
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt
)
308 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
309 != loop_exit_ctrl_vec_info_type
)
310 *relevant
= vect_used_in_scope
;
312 /* changing memory. */
313 if (gimple_code (stmt
) != GIMPLE_PHI
)
314 if (gimple_vdef (stmt
)
315 && !gimple_clobber_p (stmt
))
317 if (dump_enabled_p ())
318 dump_printf_loc (MSG_NOTE
, vect_location
,
319 "vec_stmt_relevant_p: stmt has vdefs.\n");
320 *relevant
= vect_used_in_scope
;
323 /* uses outside the loop. */
324 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
326 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
328 basic_block bb
= gimple_bb (USE_STMT (use_p
));
329 if (!flow_bb_inside_loop_p (loop
, bb
))
331 if (dump_enabled_p ())
332 dump_printf_loc (MSG_NOTE
, vect_location
,
333 "vec_stmt_relevant_p: used out of loop.\n");
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 /* We expect all such uses to be in the loop exit phis
339 (because of loop closed form) */
340 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
341 gcc_assert (bb
== single_exit (loop
)->dest
);
348 return (*live_p
|| *relevant
);
352 /* Function exist_non_indexing_operands_for_use_p
354 USE is one of the uses attached to STMT. Check if USE is
355 used in STMT for anything other than indexing an array. */
358 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
361 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
363 /* USE corresponds to some operand in STMT. If there is no data
364 reference in STMT, then any operand that corresponds to USE
365 is not indexing an array. */
366 if (!STMT_VINFO_DATA_REF (stmt_info
))
369 /* STMT has a data_ref. FORNOW this means that its of one of
373 (This should have been verified in analyze_data_refs).
375 'var' in the second case corresponds to a def, not a use,
376 so USE cannot correspond to any operands that are not used
379 Therefore, all we need to check is if STMT falls into the
380 first case, and whether var corresponds to USE. */
382 if (!gimple_assign_copy_p (stmt
))
384 if (is_gimple_call (stmt
)
385 && gimple_call_internal_p (stmt
))
386 switch (gimple_call_internal_fn (stmt
))
389 operand
= gimple_call_arg (stmt
, 3);
394 operand
= gimple_call_arg (stmt
, 2);
404 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
406 operand
= gimple_assign_rhs1 (stmt
);
407 if (TREE_CODE (operand
) != SSA_NAME
)
418 Function process_use.
421 - a USE in STMT in a loop represented by LOOP_VINFO
422 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
423 that defined USE. This is done by calling mark_relevant and passing it
424 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
425 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
429 Generally, LIVE_P and RELEVANT are used to define the liveness and
430 relevance info of the DEF_STMT of this USE:
431 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
432 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
434 - case 1: If USE is used only for address computations (e.g. array indexing),
435 which does not need to be directly vectorized, then the liveness/relevance
436 of the respective DEF_STMT is left unchanged.
437 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
438 skip DEF_STMT cause it had already been processed.
439 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
440 be modified accordingly.
442 Return true if everything is as expected. Return false otherwise. */
445 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
446 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
449 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
450 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
451 stmt_vec_info dstmt_vinfo
;
452 basic_block bb
, def_bb
;
455 enum vect_def_type dt
;
457 /* case 1: we are only interested in uses that need to be vectorized. Uses
458 that are used for address computation are not considered relevant. */
459 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
462 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, &def_stmt
, &def
, &dt
))
464 if (dump_enabled_p ())
465 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
466 "not vectorized: unsupported use in stmt.\n");
470 if (!def_stmt
|| gimple_nop_p (def_stmt
))
473 def_bb
= gimple_bb (def_stmt
);
474 if (!flow_bb_inside_loop_p (loop
, def_bb
))
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
481 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
482 DEF_STMT must have already been processed, because this should be the
483 only way that STMT, which is a reduction-phi, was put in the worklist,
484 as there should be no other uses for DEF_STMT in the loop. So we just
485 check that everything is as expected, and we are done. */
486 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
487 bb
= gimple_bb (stmt
);
488 if (gimple_code (stmt
) == GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
490 && gimple_code (def_stmt
) != GIMPLE_PHI
491 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
492 && bb
->loop_father
== def_bb
->loop_father
)
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_NOTE
, vect_location
,
496 "reduc-stmt defining reduc-phi in the same nest.\n");
497 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
498 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
499 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
500 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
501 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
505 /* case 3a: outer-loop stmt defining an inner-loop stmt:
506 outer-loop-header-bb:
512 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
514 if (dump_enabled_p ())
515 dump_printf_loc (MSG_NOTE
, vect_location
,
516 "outer-loop def-stmt defining inner-loop stmt.\n");
520 case vect_unused_in_scope
:
521 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
522 vect_used_in_scope
: vect_unused_in_scope
;
525 case vect_used_in_outer_by_reduction
:
526 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
527 relevant
= vect_used_by_reduction
;
530 case vect_used_in_outer
:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
532 relevant
= vect_used_in_scope
;
535 case vect_used_in_scope
:
543 /* case 3b: inner-loop stmt defining an outer-loop stmt:
544 outer-loop-header-bb:
548 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
550 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
552 if (dump_enabled_p ())
553 dump_printf_loc (MSG_NOTE
, vect_location
,
554 "inner-loop def-stmt defining outer-loop stmt.\n");
558 case vect_unused_in_scope
:
559 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
560 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
561 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
564 case vect_used_by_reduction
:
565 relevant
= vect_used_in_outer_by_reduction
;
568 case vect_used_in_scope
:
569 relevant
= vect_used_in_outer
;
577 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
578 is_pattern_stmt_p (stmt_vinfo
));
583 /* Function vect_mark_stmts_to_be_vectorized.
585 Not all stmts in the loop need to be vectorized. For example:
594 Stmt 1 and 3 do not need to be vectorized, because loop control and
595 addressing of vectorized data-refs are handled differently.
597 This pass detects such stmts. */
600 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
602 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
603 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
604 unsigned int nbbs
= loop
->num_nodes
;
605 gimple_stmt_iterator si
;
608 stmt_vec_info stmt_vinfo
;
612 enum vect_relevant relevant
, tmp_relevant
;
613 enum vect_def_type def_type
;
615 if (dump_enabled_p ())
616 dump_printf_loc (MSG_NOTE
, vect_location
,
617 "=== vect_mark_stmts_to_be_vectorized ===\n");
619 auto_vec
<gimple
*, 64> worklist
;
621 /* 1. Init worklist. */
622 for (i
= 0; i
< nbbs
; i
++)
625 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
628 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
631 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
634 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 stmt
= gsi_stmt (si
);
640 if (dump_enabled_p ())
642 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
643 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
646 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt
= worklist
.pop ();
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
661 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
664 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
665 (DEF_STMT) as relevant/irrelevant and live/dead according to the
666 liveness and relevance properties of STMT. */
667 stmt_vinfo
= vinfo_for_stmt (stmt
);
668 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
669 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
671 /* Generally, the liveness and relevance properties of STMT are
672 propagated as is to the DEF_STMTs of its USEs:
673 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
674 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
676 One exception is when STMT has been identified as defining a reduction
677 variable; in this case we set the liveness/relevance as follows:
679 relevant = vect_used_by_reduction
680 This is because we distinguish between two kinds of relevant stmts -
681 those that are used by a reduction computation, and those that are
682 (also) used by a regular computation. This allows us later on to
683 identify stmts that are used solely by a reduction, and therefore the
684 order of the results that they produce does not have to be kept. */
686 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
687 tmp_relevant
= relevant
;
690 case vect_reduction_def
:
691 switch (tmp_relevant
)
693 case vect_unused_in_scope
:
694 relevant
= vect_used_by_reduction
;
697 case vect_used_by_reduction
:
698 if (gimple_code (stmt
) == GIMPLE_PHI
)
703 if (dump_enabled_p ())
704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
705 "unsupported use of reduction.\n");
712 case vect_nested_cycle
:
713 if (tmp_relevant
!= vect_unused_in_scope
714 && tmp_relevant
!= vect_used_in_outer_by_reduction
715 && tmp_relevant
!= vect_used_in_outer
)
717 if (dump_enabled_p ())
718 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
719 "unsupported use of nested cycle.\n");
727 case vect_double_reduction_def
:
728 if (tmp_relevant
!= vect_unused_in_scope
729 && tmp_relevant
!= vect_used_by_reduction
)
731 if (dump_enabled_p ())
732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
733 "unsupported use of double reduction.\n");
745 if (is_pattern_stmt_p (stmt_vinfo
))
747 /* Pattern statements are not inserted into the code, so
748 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
749 have to scan the RHS or function arguments instead. */
750 if (is_gimple_assign (stmt
))
752 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
753 tree op
= gimple_assign_rhs1 (stmt
);
756 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
758 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
759 live_p
, relevant
, &worklist
, false)
760 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
761 live_p
, relevant
, &worklist
, false))
765 for (; i
< gimple_num_ops (stmt
); i
++)
767 op
= gimple_op (stmt
, i
);
768 if (TREE_CODE (op
) == SSA_NAME
769 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
774 else if (is_gimple_call (stmt
))
776 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
778 tree arg
= gimple_call_arg (stmt
, i
);
779 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
786 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
788 tree op
= USE_FROM_PTR (use_p
);
789 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
794 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
797 tree decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
799 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
803 } /* while worklist */
809 /* Function vect_model_simple_cost.
811 Models cost for simple operations, i.e. those that only emit ncopies of a
812 single op. Right now, this does not account for multiple insns that could
813 be generated for the single vector op. We will handle that shortly. */
816 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
817 enum vect_def_type
*dt
,
818 stmt_vector_for_cost
*prologue_cost_vec
,
819 stmt_vector_for_cost
*body_cost_vec
)
822 int inside_cost
= 0, prologue_cost
= 0;
824 /* The SLP costs were already calculated during SLP tree build. */
825 if (PURE_SLP_STMT (stmt_info
))
828 /* FORNOW: Assuming maximum 2 args per stmts. */
829 for (i
= 0; i
< 2; i
++)
830 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
831 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
832 stmt_info
, 0, vect_prologue
);
834 /* Pass the inside-of-loop statements to the target-specific cost model. */
835 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
836 stmt_info
, 0, vect_body
);
838 if (dump_enabled_p ())
839 dump_printf_loc (MSG_NOTE
, vect_location
,
840 "vect_model_simple_cost: inside_cost = %d, "
841 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
845 /* Model cost for type demotion and promotion operations. PWR is normally
846 zero for single-step promotions and demotions. It will be one if
847 two-step promotion/demotion is required, and so on. Each additional
848 step doubles the number of instructions required. */
851 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
852 enum vect_def_type
*dt
, int pwr
)
855 int inside_cost
= 0, prologue_cost
= 0;
856 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
857 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
858 void *target_cost_data
;
860 /* The SLP costs were already calculated during SLP tree build. */
861 if (PURE_SLP_STMT (stmt_info
))
865 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
867 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
869 for (i
= 0; i
< pwr
+ 1; i
++)
871 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
873 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
874 vec_promote_demote
, stmt_info
, 0,
878 /* FORNOW: Assuming maximum 2 args per stmts. */
879 for (i
= 0; i
< 2; i
++)
880 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
881 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
882 stmt_info
, 0, vect_prologue
);
884 if (dump_enabled_p ())
885 dump_printf_loc (MSG_NOTE
, vect_location
,
886 "vect_model_promotion_demotion_cost: inside_cost = %d, "
887 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
890 /* Function vect_cost_group_size
892 For grouped load or store, return the group_size only if it is the first
893 load or store of a group, else return 1. This ensures that group size is
894 only returned once per group. */
897 vect_cost_group_size (stmt_vec_info stmt_info
)
899 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
901 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
902 return GROUP_SIZE (stmt_info
);
908 /* Function vect_model_store_cost
910 Models cost for stores. In the case of grouped accesses, one access
911 has the overhead of the grouped access attributed to it. */
914 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
915 bool store_lanes_p
, enum vect_def_type dt
,
917 stmt_vector_for_cost
*prologue_cost_vec
,
918 stmt_vector_for_cost
*body_cost_vec
)
921 unsigned int inside_cost
= 0, prologue_cost
= 0;
922 struct data_reference
*first_dr
;
925 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
926 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
927 stmt_info
, 0, vect_prologue
);
929 /* Grouped access? */
930 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
934 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
939 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
940 group_size
= vect_cost_group_size (stmt_info
);
943 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
945 /* Not a grouped access. */
949 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
952 /* We assume that the cost of a single store-lanes instruction is
953 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
954 access is instead being provided by a permute-and-store operation,
955 include the cost of the permutes. */
956 if (!store_lanes_p
&& group_size
> 1
957 && !STMT_VINFO_STRIDED_P (stmt_info
))
959 /* Uses a high and low interleave or shuffle operations for each
961 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
962 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
963 stmt_info
, 0, vect_body
);
965 if (dump_enabled_p ())
966 dump_printf_loc (MSG_NOTE
, vect_location
,
967 "vect_model_store_cost: strided group_size = %d .\n",
971 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
972 /* Costs of the stores. */
973 if (STMT_VINFO_STRIDED_P (stmt_info
)
974 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
976 /* N scalar stores plus extracting the elements. */
977 inside_cost
+= record_stmt_cost (body_cost_vec
,
978 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
979 scalar_store
, stmt_info
, 0, vect_body
);
982 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
984 if (STMT_VINFO_STRIDED_P (stmt_info
))
985 inside_cost
+= record_stmt_cost (body_cost_vec
,
986 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
987 vec_to_scalar
, stmt_info
, 0, vect_body
);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE
, vect_location
,
991 "vect_model_store_cost: inside_cost = %d, "
992 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
996 /* Calculate cost of DR's memory access. */
998 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
999 unsigned int *inside_cost
,
1000 stmt_vector_for_cost
*body_cost_vec
)
1002 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1003 gimple
*stmt
= DR_STMT (dr
);
1004 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1006 switch (alignment_support_scheme
)
1010 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1011 vector_store
, stmt_info
, 0,
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_NOTE
, vect_location
,
1016 "vect_model_store_cost: aligned.\n");
1020 case dr_unaligned_supported
:
1022 /* Here, we assign an additional cost for the unaligned store. */
1023 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1024 unaligned_store
, stmt_info
,
1025 DR_MISALIGNMENT (dr
), vect_body
);
1026 if (dump_enabled_p ())
1027 dump_printf_loc (MSG_NOTE
, vect_location
,
1028 "vect_model_store_cost: unaligned supported by "
1033 case dr_unaligned_unsupported
:
1035 *inside_cost
= VECT_MAX_COST
;
1037 if (dump_enabled_p ())
1038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1039 "vect_model_store_cost: unsupported access.\n");
1049 /* Function vect_model_load_cost
1051 Models cost for loads. In the case of grouped accesses, the last access
1052 has the overhead of the grouped access attributed to it. Since unaligned
1053 accesses are supported for loads, we also account for the costs of the
1054 access scheme chosen. */
1057 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1058 bool load_lanes_p
, slp_tree slp_node
,
1059 stmt_vector_for_cost
*prologue_cost_vec
,
1060 stmt_vector_for_cost
*body_cost_vec
)
1064 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1065 unsigned int inside_cost
= 0, prologue_cost
= 0;
1067 /* Grouped accesses? */
1068 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1069 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1071 group_size
= vect_cost_group_size (stmt_info
);
1072 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1074 /* Not a grouped access. */
1081 /* We assume that the cost of a single load-lanes instruction is
1082 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1083 access is instead being provided by a load-and-permute operation,
1084 include the cost of the permutes. */
1085 if (!load_lanes_p
&& group_size
> 1
1086 && !STMT_VINFO_STRIDED_P (stmt_info
))
1088 /* Uses an even and odd extract operations or shuffle operations
1089 for each needed permute. */
1090 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1091 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1092 stmt_info
, 0, vect_body
);
1094 if (dump_enabled_p ())
1095 dump_printf_loc (MSG_NOTE
, vect_location
,
1096 "vect_model_load_cost: strided group_size = %d .\n",
1100 /* The loads themselves. */
1101 if (STMT_VINFO_STRIDED_P (stmt_info
)
1102 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1104 /* N scalar loads plus gathering them into a vector. */
1105 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1106 inside_cost
+= record_stmt_cost (body_cost_vec
,
1107 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1108 scalar_load
, stmt_info
, 0, vect_body
);
1111 vect_get_load_cost (first_dr
, ncopies
,
1112 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1113 || group_size
> 1 || slp_node
),
1114 &inside_cost
, &prologue_cost
,
1115 prologue_cost_vec
, body_cost_vec
, true);
1116 if (STMT_VINFO_STRIDED_P (stmt_info
))
1117 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1118 stmt_info
, 0, vect_body
);
1120 if (dump_enabled_p ())
1121 dump_printf_loc (MSG_NOTE
, vect_location
,
1122 "vect_model_load_cost: inside_cost = %d, "
1123 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1127 /* Calculate cost of DR's memory access. */
1129 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1130 bool add_realign_cost
, unsigned int *inside_cost
,
1131 unsigned int *prologue_cost
,
1132 stmt_vector_for_cost
*prologue_cost_vec
,
1133 stmt_vector_for_cost
*body_cost_vec
,
1134 bool record_prologue_costs
)
1136 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1137 gimple
*stmt
= DR_STMT (dr
);
1138 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1140 switch (alignment_support_scheme
)
1144 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1145 stmt_info
, 0, vect_body
);
1147 if (dump_enabled_p ())
1148 dump_printf_loc (MSG_NOTE
, vect_location
,
1149 "vect_model_load_cost: aligned.\n");
1153 case dr_unaligned_supported
:
1155 /* Here, we assign an additional cost for the unaligned load. */
1156 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1157 unaligned_load
, stmt_info
,
1158 DR_MISALIGNMENT (dr
), vect_body
);
1160 if (dump_enabled_p ())
1161 dump_printf_loc (MSG_NOTE
, vect_location
,
1162 "vect_model_load_cost: unaligned supported by "
1167 case dr_explicit_realign
:
1169 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1170 vector_load
, stmt_info
, 0, vect_body
);
1171 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1172 vec_perm
, stmt_info
, 0, vect_body
);
1174 /* FIXME: If the misalignment remains fixed across the iterations of
1175 the containing loop, the following cost should be added to the
1177 if (targetm
.vectorize
.builtin_mask_for_load
)
1178 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1179 stmt_info
, 0, vect_body
);
1181 if (dump_enabled_p ())
1182 dump_printf_loc (MSG_NOTE
, vect_location
,
1183 "vect_model_load_cost: explicit realign\n");
1187 case dr_explicit_realign_optimized
:
1189 if (dump_enabled_p ())
1190 dump_printf_loc (MSG_NOTE
, vect_location
,
1191 "vect_model_load_cost: unaligned software "
1194 /* Unaligned software pipeline has a load of an address, an initial
1195 load, and possibly a mask operation to "prime" the loop. However,
1196 if this is an access in a group of loads, which provide grouped
1197 access, then the above cost should only be considered for one
1198 access in the group. Inside the loop, there is a load op
1199 and a realignment op. */
1201 if (add_realign_cost
&& record_prologue_costs
)
1203 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1204 vector_stmt
, stmt_info
,
1206 if (targetm
.vectorize
.builtin_mask_for_load
)
1207 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1208 vector_stmt
, stmt_info
,
1212 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1213 stmt_info
, 0, vect_body
);
1214 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1215 stmt_info
, 0, vect_body
);
1217 if (dump_enabled_p ())
1218 dump_printf_loc (MSG_NOTE
, vect_location
,
1219 "vect_model_load_cost: explicit realign optimized"
1225 case dr_unaligned_unsupported
:
1227 *inside_cost
= VECT_MAX_COST
;
1229 if (dump_enabled_p ())
1230 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1231 "vect_model_load_cost: unsupported access.\n");
1240 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1241 the loop preheader for the vectorized stmt STMT. */
1244 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1247 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1250 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1251 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1255 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1259 if (nested_in_vect_loop_p (loop
, stmt
))
1262 pe
= loop_preheader_edge (loop
);
1263 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1264 gcc_assert (!new_bb
);
1268 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1270 gimple_stmt_iterator gsi_bb_start
;
1272 gcc_assert (bb_vinfo
);
1273 bb
= BB_VINFO_BB (bb_vinfo
);
1274 gsi_bb_start
= gsi_after_labels (bb
);
1275 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1279 if (dump_enabled_p ())
1281 dump_printf_loc (MSG_NOTE
, vect_location
,
1282 "created new init_stmt: ");
1283 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1287 /* Function vect_init_vector.
1289 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1290 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1291 vector type a vector with all elements equal to VAL is created first.
1292 Place the initialization at BSI if it is not NULL. Otherwise, place the
1293 initialization at the loop preheader.
1294 Return the DEF of INIT_STMT.
1295 It will be used in the vectorization of STMT. */
1298 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1305 if (TREE_CODE (type
) == VECTOR_TYPE
1306 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1308 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1310 if (CONSTANT_CLASS_P (val
))
1311 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1314 new_temp
= make_ssa_name (TREE_TYPE (type
));
1315 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1316 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1320 val
= build_vector_from_val (type
, val
);
1323 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1324 init_stmt
= gimple_build_assign (new_var
, val
);
1325 new_temp
= make_ssa_name (new_var
, init_stmt
);
1326 gimple_assign_set_lhs (init_stmt
, new_temp
);
1327 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1328 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1333 /* Function vect_get_vec_def_for_operand.
1335 OP is an operand in STMT. This function returns a (vector) def that will be
1336 used in the vectorized stmt for STMT.
1338 In the case that OP is an SSA_NAME which is defined in the loop, then
1339 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1341 In case OP is an invariant or constant, a new stmt that creates a vector def
1342 needs to be introduced. */
1345 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree
*scalar_def
)
1350 stmt_vec_info def_stmt_info
= NULL
;
1351 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1352 unsigned int nunits
;
1353 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1355 enum vect_def_type dt
;
1359 if (dump_enabled_p ())
1361 dump_printf_loc (MSG_NOTE
, vect_location
,
1362 "vect_get_vec_def_for_operand: ");
1363 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1364 dump_printf (MSG_NOTE
, "\n");
1367 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
,
1368 &def_stmt
, &def
, &dt
);
1369 gcc_assert (is_simple_use
);
1370 if (dump_enabled_p ())
1372 int loc_printed
= 0;
1375 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1377 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1378 dump_printf (MSG_NOTE
, "\n");
1383 dump_printf (MSG_NOTE
, " def_stmt = ");
1385 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1386 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1392 /* Case 1: operand is a constant. */
1393 case vect_constant_def
:
1395 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1396 gcc_assert (vector_type
);
1397 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1402 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1403 if (dump_enabled_p ())
1404 dump_printf_loc (MSG_NOTE
, vect_location
,
1405 "Create vector_cst. nunits = %d\n", nunits
);
1407 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1410 /* Case 2: operand is defined outside the loop - loop invariant. */
1411 case vect_external_def
:
1413 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1414 gcc_assert (vector_type
);
1419 /* Create 'vec_inv = {inv,inv,..,inv}' */
1420 if (dump_enabled_p ())
1421 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1423 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1426 /* Case 3: operand is defined inside the loop. */
1427 case vect_internal_def
:
1430 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1432 /* Get the def from the vectorized stmt. */
1433 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1435 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1436 /* Get vectorized pattern statement. */
1438 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1439 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1440 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1441 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1442 gcc_assert (vec_stmt
);
1443 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1444 vec_oprnd
= PHI_RESULT (vec_stmt
);
1445 else if (is_gimple_call (vec_stmt
))
1446 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1448 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1452 /* Case 4: operand is defined by a loop header phi - reduction */
1453 case vect_reduction_def
:
1454 case vect_double_reduction_def
:
1455 case vect_nested_cycle
:
1459 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1460 loop
= (gimple_bb (def_stmt
))->loop_father
;
1462 /* Get the def before the loop */
1463 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1464 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1467 /* Case 5: operand is defined by loop-header phi - induction. */
1468 case vect_induction_def
:
1470 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1472 /* Get the def from the vectorized stmt. */
1473 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1474 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1475 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1476 vec_oprnd
= PHI_RESULT (vec_stmt
);
1478 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1488 /* Function vect_get_vec_def_for_stmt_copy
1490 Return a vector-def for an operand. This function is used when the
1491 vectorized stmt to be created (by the caller to this function) is a "copy"
1492 created in case the vectorized result cannot fit in one vector, and several
1493 copies of the vector-stmt are required. In this case the vector-def is
1494 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1495 of the stmt that defines VEC_OPRND.
1496 DT is the type of the vector def VEC_OPRND.
1499 In case the vectorization factor (VF) is bigger than the number
1500 of elements that can fit in a vectype (nunits), we have to generate
1501 more than one vector stmt to vectorize the scalar stmt. This situation
1502 arises when there are multiple data-types operated upon in the loop; the
1503 smallest data-type determines the VF, and as a result, when vectorizing
1504 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1505 vector stmt (each computing a vector of 'nunits' results, and together
1506 computing 'VF' results in each iteration). This function is called when
1507 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1508 which VF=16 and nunits=4, so the number of copies required is 4):
1510 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1512 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1513 VS1.1: vx.1 = memref1 VS1.2
1514 VS1.2: vx.2 = memref2 VS1.3
1515 VS1.3: vx.3 = memref3
1517 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1518 VSnew.1: vz1 = vx.1 + ... VSnew.2
1519 VSnew.2: vz2 = vx.2 + ... VSnew.3
1520 VSnew.3: vz3 = vx.3 + ...
1522 The vectorization of S1 is explained in vectorizable_load.
1523 The vectorization of S2:
1524 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1525 the function 'vect_get_vec_def_for_operand' is called to
1526 get the relevant vector-def for each operand of S2. For operand x it
1527 returns the vector-def 'vx.0'.
1529 To create the remaining copies of the vector-stmt (VSnew.j), this
1530 function is called to get the relevant vector-def for each operand. It is
1531 obtained from the respective VS1.j stmt, which is recorded in the
1532 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1534 For example, to obtain the vector-def 'vx.1' in order to create the
1535 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1536 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1537 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1538 and return its def ('vx.1').
1539 Overall, to create the above sequence this function will be called 3 times:
1540 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1541 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1542 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1545 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1547 gimple
*vec_stmt_for_operand
;
1548 stmt_vec_info def_stmt_info
;
1550 /* Do nothing; can reuse same def. */
1551 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1554 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1555 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1556 gcc_assert (def_stmt_info
);
1557 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1558 gcc_assert (vec_stmt_for_operand
);
1559 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1560 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1561 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1563 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1568 /* Get vectorized definitions for the operands to create a copy of an original
1569 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1572 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1573 vec
<tree
> *vec_oprnds0
,
1574 vec
<tree
> *vec_oprnds1
)
1576 tree vec_oprnd
= vec_oprnds0
->pop ();
1578 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1579 vec_oprnds0
->quick_push (vec_oprnd
);
1581 if (vec_oprnds1
&& vec_oprnds1
->length ())
1583 vec_oprnd
= vec_oprnds1
->pop ();
1584 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1585 vec_oprnds1
->quick_push (vec_oprnd
);
1590 /* Get vectorized definitions for OP0 and OP1.
1591 REDUC_INDEX is the index of reduction operand in case of reduction,
1592 and -1 otherwise. */
1595 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1596 vec
<tree
> *vec_oprnds0
,
1597 vec
<tree
> *vec_oprnds1
,
1598 slp_tree slp_node
, int reduc_index
)
1602 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1603 auto_vec
<tree
> ops (nops
);
1604 auto_vec
<vec
<tree
> > vec_defs (nops
);
1606 ops
.quick_push (op0
);
1608 ops
.quick_push (op1
);
1610 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1612 *vec_oprnds0
= vec_defs
[0];
1614 *vec_oprnds1
= vec_defs
[1];
1620 vec_oprnds0
->create (1);
1621 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1622 vec_oprnds0
->quick_push (vec_oprnd
);
1626 vec_oprnds1
->create (1);
1627 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1628 vec_oprnds1
->quick_push (vec_oprnd
);
1634 /* Function vect_finish_stmt_generation.
1636 Insert a new stmt. */
1639 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1640 gimple_stmt_iterator
*gsi
)
1642 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1643 vec_info
*vinfo
= stmt_info
->vinfo
;
1645 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1647 if (!gsi_end_p (*gsi
)
1648 && gimple_has_mem_ops (vec_stmt
))
1650 gimple
*at_stmt
= gsi_stmt (*gsi
);
1651 tree vuse
= gimple_vuse (at_stmt
);
1652 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1654 tree vdef
= gimple_vdef (at_stmt
);
1655 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1656 /* If we have an SSA vuse and insert a store, update virtual
1657 SSA form to avoid triggering the renamer. Do so only
1658 if we can easily see all uses - which is what almost always
1659 happens with the way vectorized stmts are inserted. */
1660 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1661 && ((is_gimple_assign (vec_stmt
)
1662 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1663 || (is_gimple_call (vec_stmt
)
1664 && !(gimple_call_flags (vec_stmt
)
1665 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1667 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1668 gimple_set_vdef (vec_stmt
, new_vdef
);
1669 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1673 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1675 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1677 if (dump_enabled_p ())
1679 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1680 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1683 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1685 /* While EH edges will generally prevent vectorization, stmt might
1686 e.g. be in a must-not-throw region. Ensure newly created stmts
1687 that could throw are part of the same region. */
1688 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1689 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1690 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1693 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1694 a function declaration if the target has a vectorized version
1695 of the function, or NULL_TREE if the function cannot be vectorized. */
1698 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1700 tree fndecl
= gimple_call_fndecl (call
);
1702 /* We only handle functions that do not read or clobber memory -- i.e.
1703 const or novops ones. */
1704 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1708 || TREE_CODE (fndecl
) != FUNCTION_DECL
1709 || !DECL_BUILT_IN (fndecl
))
1712 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1717 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1718 gimple_stmt_iterator
*);
1721 /* Function vectorizable_mask_load_store.
1723 Check if STMT performs a conditional load or store that can be vectorized.
1724 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1725 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1726 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1729 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1730 gimple
**vec_stmt
, slp_tree slp_node
)
1732 tree vec_dest
= NULL
;
1733 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1734 stmt_vec_info prev_stmt_info
;
1735 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1736 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1737 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1738 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1739 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1743 tree dataref_ptr
= NULL_TREE
;
1745 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1749 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1750 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1751 int gather_scale
= 1;
1752 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1757 enum vect_def_type dt
;
1759 if (slp_node
!= NULL
)
1762 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1763 gcc_assert (ncopies
>= 1);
1765 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1766 mask
= gimple_call_arg (stmt
, 2);
1767 if (TYPE_PRECISION (TREE_TYPE (mask
))
1768 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1771 /* FORNOW. This restriction should be relaxed. */
1772 if (nested_in_vect_loop
&& ncopies
> 1)
1774 if (dump_enabled_p ())
1775 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1776 "multiple types in nested loop.");
1780 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1783 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1786 if (!STMT_VINFO_DATA_REF (stmt_info
))
1789 elem_type
= TREE_TYPE (vectype
);
1791 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1794 if (STMT_VINFO_STRIDED_P (stmt_info
))
1797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1801 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
1802 &gather_off
, &gather_scale
);
1803 gcc_assert (gather_decl
);
1804 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
,
1805 &def_stmt
, &def
, &gather_dt
,
1806 &gather_off_vectype
))
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1810 "gather index use not simple.");
1814 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1816 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1817 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1821 "masked gather with integer mask not supported.");
1825 else if (tree_int_cst_compare (nested_in_vect_loop
1826 ? STMT_VINFO_DR_STEP (stmt_info
)
1827 : DR_STEP (dr
), size_zero_node
) <= 0)
1829 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1830 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1833 if (TREE_CODE (mask
) != SSA_NAME
)
1836 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
,
1837 &def_stmt
, &def
, &dt
))
1842 tree rhs
= gimple_call_arg (stmt
, 3);
1843 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
,
1844 &def_stmt
, &def
, &dt
))
1848 if (!vec_stmt
) /* transformation not required. */
1850 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1852 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1855 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1861 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1863 tree vec_oprnd0
= NULL_TREE
, op
;
1864 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1865 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1866 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1867 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1868 tree mask_perm_mask
= NULL_TREE
;
1869 edge pe
= loop_preheader_edge (loop
);
1872 enum { NARROW
, NONE
, WIDEN
} modifier
;
1873 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1875 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1876 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1877 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1878 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1879 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1880 scaletype
= TREE_VALUE (arglist
);
1881 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1882 && types_compatible_p (srctype
, masktype
));
1884 if (nunits
== gather_off_nunits
)
1886 else if (nunits
== gather_off_nunits
/ 2)
1888 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1891 for (i
= 0; i
< gather_off_nunits
; ++i
)
1892 sel
[i
] = i
| nunits
;
1894 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1896 else if (nunits
== gather_off_nunits
* 2)
1898 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1901 for (i
= 0; i
< nunits
; ++i
)
1902 sel
[i
] = i
< gather_off_nunits
1903 ? i
: i
+ nunits
- gather_off_nunits
;
1905 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1907 for (i
= 0; i
< nunits
; ++i
)
1908 sel
[i
] = i
| gather_off_nunits
;
1909 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1914 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1916 ptr
= fold_convert (ptrtype
, gather_base
);
1917 if (!is_gimple_min_invariant (ptr
))
1919 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1920 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1921 gcc_assert (!new_bb
);
1924 scale
= build_int_cst (scaletype
, gather_scale
);
1926 prev_stmt_info
= NULL
;
1927 for (j
= 0; j
< ncopies
; ++j
)
1929 if (modifier
== WIDEN
&& (j
& 1))
1930 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1931 perm_mask
, stmt
, gsi
);
1934 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1937 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1939 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1941 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1942 == TYPE_VECTOR_SUBPARTS (idxtype
));
1943 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1944 var
= make_ssa_name (var
);
1945 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1947 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1948 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1952 if (mask_perm_mask
&& (j
& 1))
1953 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1954 mask_perm_mask
, stmt
, gsi
);
1958 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1961 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
,
1962 &def_stmt
, &def
, &dt
);
1963 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1967 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1970 == TYPE_VECTOR_SUBPARTS (masktype
));
1971 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
1973 var
= make_ssa_name (var
);
1974 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1976 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
1977 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1983 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
1986 if (!useless_type_conversion_p (vectype
, rettype
))
1988 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
1989 == TYPE_VECTOR_SUBPARTS (rettype
));
1990 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
1991 op
= make_ssa_name (var
, new_stmt
);
1992 gimple_call_set_lhs (new_stmt
, op
);
1993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1994 var
= make_ssa_name (vec_dest
);
1995 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
1996 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2000 var
= make_ssa_name (vec_dest
, new_stmt
);
2001 gimple_call_set_lhs (new_stmt
, var
);
2004 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2006 if (modifier
== NARROW
)
2013 var
= permute_vec_elements (prev_res
, var
,
2014 perm_mask
, stmt
, gsi
);
2015 new_stmt
= SSA_NAME_DEF_STMT (var
);
2018 if (prev_stmt_info
== NULL
)
2019 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2021 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2022 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2025 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2027 tree lhs
= gimple_call_lhs (stmt
);
2028 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2029 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2030 set_vinfo_for_stmt (stmt
, NULL
);
2031 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2032 gsi_replace (gsi
, new_stmt
, true);
2037 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2038 prev_stmt_info
= NULL
;
2039 for (i
= 0; i
< ncopies
; i
++)
2041 unsigned align
, misalign
;
2045 tree rhs
= gimple_call_arg (stmt
, 3);
2046 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2047 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2048 /* We should have catched mismatched types earlier. */
2049 gcc_assert (useless_type_conversion_p (vectype
,
2050 TREE_TYPE (vec_rhs
)));
2051 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2052 NULL_TREE
, &dummy
, gsi
,
2053 &ptr_incr
, false, &inv_p
);
2054 gcc_assert (!inv_p
);
2058 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, &def_stmt
,
2060 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2061 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, &def_stmt
,
2063 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2064 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2065 TYPE_SIZE_UNIT (vectype
));
2068 align
= TYPE_ALIGN_UNIT (vectype
);
2069 if (aligned_access_p (dr
))
2071 else if (DR_MISALIGNMENT (dr
) == -1)
2073 align
= TYPE_ALIGN_UNIT (elem_type
);
2077 misalign
= DR_MISALIGNMENT (dr
);
2078 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2081 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2082 gimple_call_arg (stmt
, 1),
2084 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2086 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2088 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2089 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2094 tree vec_mask
= NULL_TREE
;
2095 prev_stmt_info
= NULL
;
2096 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2097 for (i
= 0; i
< ncopies
; i
++)
2099 unsigned align
, misalign
;
2103 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2104 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2105 NULL_TREE
, &dummy
, gsi
,
2106 &ptr_incr
, false, &inv_p
);
2107 gcc_assert (!inv_p
);
2111 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, &def_stmt
,
2113 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2114 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2115 TYPE_SIZE_UNIT (vectype
));
2118 align
= TYPE_ALIGN_UNIT (vectype
);
2119 if (aligned_access_p (dr
))
2121 else if (DR_MISALIGNMENT (dr
) == -1)
2123 align
= TYPE_ALIGN_UNIT (elem_type
);
2127 misalign
= DR_MISALIGNMENT (dr
);
2128 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2131 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2132 gimple_call_arg (stmt
, 1),
2134 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2135 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2137 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2139 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2140 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2146 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2148 tree lhs
= gimple_call_lhs (stmt
);
2149 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2150 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2151 set_vinfo_for_stmt (stmt
, NULL
);
2152 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2153 gsi_replace (gsi
, new_stmt
, true);
2160 /* Function vectorizable_call.
2162 Check if GS performs a function call that can be vectorized.
2163 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2164 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2165 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2168 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2175 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2176 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2177 tree vectype_out
, vectype_in
;
2180 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2181 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2182 vec_info
*vinfo
= stmt_info
->vinfo
;
2183 tree fndecl
, new_temp
, def
, rhs_type
;
2185 enum vect_def_type dt
[3]
2186 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2187 gimple
*new_stmt
= NULL
;
2189 vec
<tree
> vargs
= vNULL
;
2190 enum { NARROW
, NONE
, WIDEN
} modifier
;
2194 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2197 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2200 /* Is GS a vectorizable call? */
2201 stmt
= dyn_cast
<gcall
*> (gs
);
2205 if (gimple_call_internal_p (stmt
)
2206 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2207 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2208 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2211 if (gimple_call_lhs (stmt
) == NULL_TREE
2212 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2215 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2217 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2219 /* Process function arguments. */
2220 rhs_type
= NULL_TREE
;
2221 vectype_in
= NULL_TREE
;
2222 nargs
= gimple_call_num_args (stmt
);
2224 /* Bail out if the function has more than three arguments, we do not have
2225 interesting builtin functions to vectorize with more than two arguments
2226 except for fma. No arguments is also not good. */
2227 if (nargs
== 0 || nargs
> 3)
2230 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2231 if (gimple_call_internal_p (stmt
)
2232 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2235 rhs_type
= unsigned_type_node
;
2238 for (i
= 0; i
< nargs
; i
++)
2242 op
= gimple_call_arg (stmt
, i
);
2244 /* We can only handle calls with arguments of the same type. */
2246 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2248 if (dump_enabled_p ())
2249 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2250 "argument types differ.\n");
2254 rhs_type
= TREE_TYPE (op
);
2256 if (!vect_is_simple_use_1 (op
, stmt
, vinfo
,
2257 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2259 if (dump_enabled_p ())
2260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2261 "use not simple.\n");
2266 vectype_in
= opvectype
;
2268 && opvectype
!= vectype_in
)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2272 "argument vector types differ.\n");
2276 /* If all arguments are external or constant defs use a vector type with
2277 the same size as the output vector type. */
2279 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2281 gcc_assert (vectype_in
);
2284 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2287 "no vectype for scalar type ");
2288 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2289 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2296 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2297 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2298 if (nunits_in
== nunits_out
/ 2)
2300 else if (nunits_out
== nunits_in
)
2302 else if (nunits_out
== nunits_in
/ 2)
2307 /* For now, we only vectorize functions if a target specific builtin
2308 is available. TODO -- in some cases, it might be profitable to
2309 insert the calls for pieces of the vector, in order to be able
2310 to vectorize other operations in the loop. */
2311 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2312 if (fndecl
== NULL_TREE
)
2314 if (gimple_call_internal_p (stmt
)
2315 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2318 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2319 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2320 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2321 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2323 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2324 { 0, 1, 2, ... vf - 1 } vector. */
2325 gcc_assert (nargs
== 0);
2329 if (dump_enabled_p ())
2330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2331 "function is not vectorizable.\n");
2336 gcc_assert (!gimple_vuse (stmt
));
2338 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2340 else if (modifier
== NARROW
)
2341 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2343 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2345 /* Sanity check: make sure that at least one copy of the vectorized stmt
2346 needs to be generated. */
2347 gcc_assert (ncopies
>= 1);
2349 if (!vec_stmt
) /* transformation not required. */
2351 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2352 if (dump_enabled_p ())
2353 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2355 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2361 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2365 scalar_dest
= gimple_call_lhs (stmt
);
2366 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2368 prev_stmt_info
= NULL
;
2372 for (j
= 0; j
< ncopies
; ++j
)
2374 /* Build argument list for the vectorized call. */
2376 vargs
.create (nargs
);
2382 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2383 vec
<tree
> vec_oprnds0
;
2385 for (i
= 0; i
< nargs
; i
++)
2386 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2387 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2388 vec_oprnds0
= vec_defs
[0];
2390 /* Arguments are ready. Create the new vector stmt. */
2391 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2394 for (k
= 0; k
< nargs
; k
++)
2396 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2397 vargs
[k
] = vec_oprndsk
[i
];
2399 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2400 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2401 gimple_call_set_lhs (new_stmt
, new_temp
);
2402 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2403 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2406 for (i
= 0; i
< nargs
; i
++)
2408 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2409 vec_oprndsi
.release ();
2414 for (i
= 0; i
< nargs
; i
++)
2416 op
= gimple_call_arg (stmt
, i
);
2419 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2422 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2424 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2427 vargs
.quick_push (vec_oprnd0
);
2430 if (gimple_call_internal_p (stmt
)
2431 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2433 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2435 for (k
= 0; k
< nunits_out
; ++k
)
2436 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2437 tree cst
= build_vector (vectype_out
, v
);
2439 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2440 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2441 new_temp
= make_ssa_name (new_var
, init_stmt
);
2442 gimple_assign_set_lhs (init_stmt
, new_temp
);
2443 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2444 new_temp
= make_ssa_name (vec_dest
);
2445 new_stmt
= gimple_build_assign (new_temp
,
2446 gimple_assign_lhs (init_stmt
));
2450 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2451 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2452 gimple_call_set_lhs (new_stmt
, new_temp
);
2454 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2457 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2459 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2461 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2467 for (j
= 0; j
< ncopies
; ++j
)
2469 /* Build argument list for the vectorized call. */
2471 vargs
.create (nargs
* 2);
2477 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2478 vec
<tree
> vec_oprnds0
;
2480 for (i
= 0; i
< nargs
; i
++)
2481 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2482 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2483 vec_oprnds0
= vec_defs
[0];
2485 /* Arguments are ready. Create the new vector stmt. */
2486 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2490 for (k
= 0; k
< nargs
; k
++)
2492 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2493 vargs
.quick_push (vec_oprndsk
[i
]);
2494 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2496 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2497 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2498 gimple_call_set_lhs (new_stmt
, new_temp
);
2499 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2500 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2503 for (i
= 0; i
< nargs
; i
++)
2505 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2506 vec_oprndsi
.release ();
2511 for (i
= 0; i
< nargs
; i
++)
2513 op
= gimple_call_arg (stmt
, i
);
2517 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2519 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2523 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2525 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2527 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2530 vargs
.quick_push (vec_oprnd0
);
2531 vargs
.quick_push (vec_oprnd1
);
2534 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2535 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2536 gimple_call_set_lhs (new_stmt
, new_temp
);
2537 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2540 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2542 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2544 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2547 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2552 /* No current target implements this case. */
2558 /* The call in STMT might prevent it from being removed in dce.
2559 We however cannot remove it here, due to the way the ssa name
2560 it defines is mapped to the new definition. So just replace
2561 rhs of the statement with something harmless. */
2566 type
= TREE_TYPE (scalar_dest
);
2567 if (is_pattern_stmt_p (stmt_info
))
2568 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2570 lhs
= gimple_call_lhs (stmt
);
2572 if (gimple_call_internal_p (stmt
)
2573 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2575 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2576 with vf - 1 rather than 0, that is the last iteration of the
2578 imm_use_iterator iter
;
2579 use_operand_p use_p
;
2581 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2583 basic_block use_bb
= gimple_bb (use_stmt
);
2585 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), use_bb
))
2587 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2588 SET_USE (use_p
, build_int_cst (TREE_TYPE (lhs
),
2589 ncopies
* nunits_out
- 1));
2590 update_stmt (use_stmt
);
2595 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2596 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2597 set_vinfo_for_stmt (stmt
, NULL
);
2598 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2599 gsi_replace (gsi
, new_stmt
, false);
2605 struct simd_call_arg_info
2609 enum vect_def_type dt
;
2610 HOST_WIDE_INT linear_step
;
2612 bool simd_lane_linear
;
2615 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2616 is linear within simd lane (but not within whole loop), note it in
2620 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2621 struct simd_call_arg_info
*arginfo
)
2623 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2625 if (!is_gimple_assign (def_stmt
)
2626 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2627 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2630 tree base
= gimple_assign_rhs1 (def_stmt
);
2631 HOST_WIDE_INT linear_step
= 0;
2632 tree v
= gimple_assign_rhs2 (def_stmt
);
2633 while (TREE_CODE (v
) == SSA_NAME
)
2636 def_stmt
= SSA_NAME_DEF_STMT (v
);
2637 if (is_gimple_assign (def_stmt
))
2638 switch (gimple_assign_rhs_code (def_stmt
))
2641 t
= gimple_assign_rhs2 (def_stmt
);
2642 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2644 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2645 v
= gimple_assign_rhs1 (def_stmt
);
2648 t
= gimple_assign_rhs2 (def_stmt
);
2649 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2651 linear_step
= tree_to_shwi (t
);
2652 v
= gimple_assign_rhs1 (def_stmt
);
2655 t
= gimple_assign_rhs1 (def_stmt
);
2656 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2657 || (TYPE_PRECISION (TREE_TYPE (v
))
2658 < TYPE_PRECISION (TREE_TYPE (t
))))
2667 else if (is_gimple_call (def_stmt
)
2668 && gimple_call_internal_p (def_stmt
)
2669 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
2671 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
2672 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
2677 arginfo
->linear_step
= linear_step
;
2679 arginfo
->simd_lane_linear
= true;
2685 /* Function vectorizable_simd_clone_call.
2687 Check if STMT performs a function call that can be vectorized
2688 by calling a simd clone of the function.
2689 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2690 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2691 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2694 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2695 gimple
**vec_stmt
, slp_tree slp_node
)
2700 tree vec_oprnd0
= NULL_TREE
;
2701 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2703 unsigned int nunits
;
2704 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2705 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2706 vec_info
*vinfo
= stmt_info
->vinfo
;
2707 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2708 tree fndecl
, new_temp
, def
;
2710 gimple
*new_stmt
= NULL
;
2712 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2713 vec
<tree
> vargs
= vNULL
;
2715 tree lhs
, rtype
, ratype
;
2716 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2718 /* Is STMT a vectorizable call? */
2719 if (!is_gimple_call (stmt
))
2722 fndecl
= gimple_call_fndecl (stmt
);
2723 if (fndecl
== NULL_TREE
)
2726 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2727 if (node
== NULL
|| node
->simd_clones
== NULL
)
2730 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2733 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2736 if (gimple_call_lhs (stmt
)
2737 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2740 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2742 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2744 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2748 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2751 /* Process function arguments. */
2752 nargs
= gimple_call_num_args (stmt
);
2754 /* Bail out if the function has zero arguments. */
2758 arginfo
.create (nargs
);
2760 for (i
= 0; i
< nargs
; i
++)
2762 simd_call_arg_info thisarginfo
;
2765 thisarginfo
.linear_step
= 0;
2766 thisarginfo
.align
= 0;
2767 thisarginfo
.op
= NULL_TREE
;
2768 thisarginfo
.simd_lane_linear
= false;
2770 op
= gimple_call_arg (stmt
, i
);
2771 if (!vect_is_simple_use_1 (op
, stmt
, vinfo
,
2772 &def_stmt
, &def
, &thisarginfo
.dt
,
2773 &thisarginfo
.vectype
)
2774 || thisarginfo
.dt
== vect_uninitialized_def
)
2776 if (dump_enabled_p ())
2777 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2778 "use not simple.\n");
2783 if (thisarginfo
.dt
== vect_constant_def
2784 || thisarginfo
.dt
== vect_external_def
)
2785 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2787 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2789 /* For linear arguments, the analyze phase should have saved
2790 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2791 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2792 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
2794 gcc_assert (vec_stmt
);
2795 thisarginfo
.linear_step
2796 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
2798 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
2799 thisarginfo
.simd_lane_linear
2800 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
2801 == boolean_true_node
);
2802 /* If loop has been peeled for alignment, we need to adjust it. */
2803 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2804 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2805 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
2807 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2808 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
2809 tree opt
= TREE_TYPE (thisarginfo
.op
);
2810 bias
= fold_convert (TREE_TYPE (step
), bias
);
2811 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2813 = fold_build2 (POINTER_TYPE_P (opt
)
2814 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2815 thisarginfo
.op
, bias
);
2819 && thisarginfo
.dt
!= vect_constant_def
2820 && thisarginfo
.dt
!= vect_external_def
2822 && TREE_CODE (op
) == SSA_NAME
2823 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2825 && tree_fits_shwi_p (iv
.step
))
2827 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2828 thisarginfo
.op
= iv
.base
;
2830 else if ((thisarginfo
.dt
== vect_constant_def
2831 || thisarginfo
.dt
== vect_external_def
)
2832 && POINTER_TYPE_P (TREE_TYPE (op
)))
2833 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2834 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2836 if (POINTER_TYPE_P (TREE_TYPE (op
))
2837 && !thisarginfo
.linear_step
2839 && thisarginfo
.dt
!= vect_constant_def
2840 && thisarginfo
.dt
!= vect_external_def
2843 && TREE_CODE (op
) == SSA_NAME
)
2844 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
2846 arginfo
.quick_push (thisarginfo
);
2849 unsigned int badness
= 0;
2850 struct cgraph_node
*bestn
= NULL
;
2851 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2852 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2854 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2855 n
= n
->simdclone
->next_clone
)
2857 unsigned int this_badness
= 0;
2858 if (n
->simdclone
->simdlen
2859 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2860 || n
->simdclone
->nargs
!= nargs
)
2862 if (n
->simdclone
->simdlen
2863 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2864 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2865 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2866 if (n
->simdclone
->inbranch
)
2867 this_badness
+= 2048;
2868 int target_badness
= targetm
.simd_clone
.usable (n
);
2869 if (target_badness
< 0)
2871 this_badness
+= target_badness
* 512;
2872 /* FORNOW: Have to add code to add the mask argument. */
2873 if (n
->simdclone
->inbranch
)
2875 for (i
= 0; i
< nargs
; i
++)
2877 switch (n
->simdclone
->args
[i
].arg_type
)
2879 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2880 if (!useless_type_conversion_p
2881 (n
->simdclone
->args
[i
].orig_type
,
2882 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2884 else if (arginfo
[i
].dt
== vect_constant_def
2885 || arginfo
[i
].dt
== vect_external_def
2886 || arginfo
[i
].linear_step
)
2889 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2890 if (arginfo
[i
].dt
!= vect_constant_def
2891 && arginfo
[i
].dt
!= vect_external_def
)
2894 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2895 if (arginfo
[i
].dt
== vect_constant_def
2896 || arginfo
[i
].dt
== vect_external_def
2897 || (arginfo
[i
].linear_step
2898 != n
->simdclone
->args
[i
].linear_step
))
2901 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2905 case SIMD_CLONE_ARG_TYPE_MASK
:
2908 if (i
== (size_t) -1)
2910 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2915 if (arginfo
[i
].align
)
2916 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2917 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2919 if (i
== (size_t) -1)
2921 if (bestn
== NULL
|| this_badness
< badness
)
2924 badness
= this_badness
;
2934 for (i
= 0; i
< nargs
; i
++)
2935 if ((arginfo
[i
].dt
== vect_constant_def
2936 || arginfo
[i
].dt
== vect_external_def
)
2937 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2940 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2942 if (arginfo
[i
].vectype
== NULL
2943 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2944 > bestn
->simdclone
->simdlen
))
2951 fndecl
= bestn
->decl
;
2952 nunits
= bestn
->simdclone
->simdlen
;
2953 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2955 /* If the function isn't const, only allow it in simd loops where user
2956 has asserted that at least nunits consecutive iterations can be
2957 performed using SIMD instructions. */
2958 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2959 && gimple_vuse (stmt
))
2965 /* Sanity check: make sure that at least one copy of the vectorized stmt
2966 needs to be generated. */
2967 gcc_assert (ncopies
>= 1);
2969 if (!vec_stmt
) /* transformation not required. */
2971 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
2972 for (i
= 0; i
< nargs
; i
++)
2973 if (bestn
->simdclone
->args
[i
].arg_type
2974 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
2976 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
2978 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
2979 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
2980 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
2981 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
2982 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
2983 tree sll
= arginfo
[i
].simd_lane_linear
2984 ? boolean_true_node
: boolean_false_node
;
2985 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
2987 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2988 if (dump_enabled_p ())
2989 dump_printf_loc (MSG_NOTE
, vect_location
,
2990 "=== vectorizable_simd_clone_call ===\n");
2991 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3002 scalar_dest
= gimple_call_lhs (stmt
);
3003 vec_dest
= NULL_TREE
;
3008 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3009 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3010 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3013 rtype
= TREE_TYPE (ratype
);
3017 prev_stmt_info
= NULL
;
3018 for (j
= 0; j
< ncopies
; ++j
)
3020 /* Build argument list for the vectorized call. */
3022 vargs
.create (nargs
);
3026 for (i
= 0; i
< nargs
; i
++)
3028 unsigned int k
, l
, m
, o
;
3030 op
= gimple_call_arg (stmt
, i
);
3031 switch (bestn
->simdclone
->args
[i
].arg_type
)
3033 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3034 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3035 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3036 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3038 if (TYPE_VECTOR_SUBPARTS (atype
)
3039 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3041 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3042 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3043 / TYPE_VECTOR_SUBPARTS (atype
));
3044 gcc_assert ((k
& (k
- 1)) == 0);
3047 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3050 vec_oprnd0
= arginfo
[i
].op
;
3051 if ((m
& (k
- 1)) == 0)
3053 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3056 arginfo
[i
].op
= vec_oprnd0
;
3058 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3060 bitsize_int ((m
& (k
- 1)) * prec
));
3062 = gimple_build_assign (make_ssa_name (atype
),
3064 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3065 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3069 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3070 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3071 gcc_assert ((k
& (k
- 1)) == 0);
3072 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3074 vec_alloc (ctor_elts
, k
);
3077 for (l
= 0; l
< k
; l
++)
3079 if (m
== 0 && l
== 0)
3081 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3084 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3086 arginfo
[i
].op
= vec_oprnd0
;
3089 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3093 vargs
.safe_push (vec_oprnd0
);
3096 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3098 = gimple_build_assign (make_ssa_name (atype
),
3100 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3101 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3106 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3107 vargs
.safe_push (op
);
3109 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3114 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3119 edge pe
= loop_preheader_edge (loop
);
3120 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3121 gcc_assert (!new_bb
);
3123 if (arginfo
[i
].simd_lane_linear
)
3125 vargs
.safe_push (arginfo
[i
].op
);
3128 tree phi_res
= copy_ssa_name (op
);
3129 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3130 set_vinfo_for_stmt (new_phi
,
3131 new_stmt_vec_info (new_phi
, loop_vinfo
));
3132 add_phi_arg (new_phi
, arginfo
[i
].op
,
3133 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3135 = POINTER_TYPE_P (TREE_TYPE (op
))
3136 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3137 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3138 ? sizetype
: TREE_TYPE (op
);
3140 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3142 tree tcst
= wide_int_to_tree (type
, cst
);
3143 tree phi_arg
= copy_ssa_name (op
);
3145 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3146 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3147 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3148 set_vinfo_for_stmt (new_stmt
,
3149 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3150 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3152 arginfo
[i
].op
= phi_res
;
3153 vargs
.safe_push (phi_res
);
3158 = POINTER_TYPE_P (TREE_TYPE (op
))
3159 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3160 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3161 ? sizetype
: TREE_TYPE (op
);
3163 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3165 tree tcst
= wide_int_to_tree (type
, cst
);
3166 new_temp
= make_ssa_name (TREE_TYPE (op
));
3167 new_stmt
= gimple_build_assign (new_temp
, code
,
3168 arginfo
[i
].op
, tcst
);
3169 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3170 vargs
.safe_push (new_temp
);
3173 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3179 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3182 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3184 new_temp
= create_tmp_var (ratype
);
3185 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3186 == TYPE_VECTOR_SUBPARTS (rtype
))
3187 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3189 new_temp
= make_ssa_name (rtype
, new_stmt
);
3190 gimple_call_set_lhs (new_stmt
, new_temp
);
3192 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3196 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3199 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3200 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3201 gcc_assert ((k
& (k
- 1)) == 0);
3202 for (l
= 0; l
< k
; l
++)
3207 t
= build_fold_addr_expr (new_temp
);
3208 t
= build2 (MEM_REF
, vectype
, t
,
3209 build_int_cst (TREE_TYPE (t
),
3210 l
* prec
/ BITS_PER_UNIT
));
3213 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3214 size_int (prec
), bitsize_int (l
* prec
));
3216 = gimple_build_assign (make_ssa_name (vectype
), t
);
3217 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3218 if (j
== 0 && l
== 0)
3219 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3221 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3223 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3228 tree clobber
= build_constructor (ratype
, NULL
);
3229 TREE_THIS_VOLATILE (clobber
) = 1;
3230 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3231 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3235 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3237 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3238 / TYPE_VECTOR_SUBPARTS (rtype
));
3239 gcc_assert ((k
& (k
- 1)) == 0);
3240 if ((j
& (k
- 1)) == 0)
3241 vec_alloc (ret_ctor_elts
, k
);
3244 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3245 for (m
= 0; m
< o
; m
++)
3247 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3248 size_int (m
), NULL_TREE
, NULL_TREE
);
3250 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3251 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3252 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3253 gimple_assign_lhs (new_stmt
));
3255 tree clobber
= build_constructor (ratype
, NULL
);
3256 TREE_THIS_VOLATILE (clobber
) = 1;
3257 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3258 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3261 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3262 if ((j
& (k
- 1)) != k
- 1)
3264 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3266 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3267 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3269 if ((unsigned) j
== k
- 1)
3270 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3272 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3274 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3279 tree t
= build_fold_addr_expr (new_temp
);
3280 t
= build2 (MEM_REF
, vectype
, t
,
3281 build_int_cst (TREE_TYPE (t
), 0));
3283 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3284 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3285 tree clobber
= build_constructor (ratype
, NULL
);
3286 TREE_THIS_VOLATILE (clobber
) = 1;
3287 vect_finish_stmt_generation (stmt
,
3288 gimple_build_assign (new_temp
,
3294 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3296 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3298 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3303 /* The call in STMT might prevent it from being removed in dce.
3304 We however cannot remove it here, due to the way the ssa name
3305 it defines is mapped to the new definition. So just replace
3306 rhs of the statement with something harmless. */
3313 type
= TREE_TYPE (scalar_dest
);
3314 if (is_pattern_stmt_p (stmt_info
))
3315 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3317 lhs
= gimple_call_lhs (stmt
);
3318 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3321 new_stmt
= gimple_build_nop ();
3322 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3323 set_vinfo_for_stmt (stmt
, NULL
);
3324 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3325 gsi_replace (gsi
, new_stmt
, true);
3326 unlink_stmt_vdef (stmt
);
3332 /* Function vect_gen_widened_results_half
3334 Create a vector stmt whose code, type, number of arguments, and result
3335 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3336 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3337 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3338 needs to be created (DECL is a function-decl of a target-builtin).
3339 STMT is the original scalar stmt that we are vectorizing. */
3342 vect_gen_widened_results_half (enum tree_code code
,
3344 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3345 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3351 /* Generate half of the widened result: */
3352 if (code
== CALL_EXPR
)
3354 /* Target specific support */
3355 if (op_type
== binary_op
)
3356 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3358 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3359 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3360 gimple_call_set_lhs (new_stmt
, new_temp
);
3364 /* Generic support */
3365 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3366 if (op_type
!= binary_op
)
3368 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3369 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3370 gimple_assign_set_lhs (new_stmt
, new_temp
);
3372 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3378 /* Get vectorized definitions for loop-based vectorization. For the first
3379 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3380 scalar operand), and for the rest we get a copy with
3381 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3382 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3383 The vectors are collected into VEC_OPRNDS. */
3386 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3387 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3391 /* Get first vector operand. */
3392 /* All the vector operands except the very first one (that is scalar oprnd)
3394 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3395 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3397 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3399 vec_oprnds
->quick_push (vec_oprnd
);
3401 /* Get second vector operand. */
3402 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3403 vec_oprnds
->quick_push (vec_oprnd
);
3407 /* For conversion in multiple steps, continue to get operands
3410 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3414 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3415 For multi-step conversions store the resulting vectors and call the function
3419 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3420 int multi_step_cvt
, gimple
*stmt
,
3422 gimple_stmt_iterator
*gsi
,
3423 slp_tree slp_node
, enum tree_code code
,
3424 stmt_vec_info
*prev_stmt_info
)
3427 tree vop0
, vop1
, new_tmp
, vec_dest
;
3429 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3431 vec_dest
= vec_dsts
.pop ();
3433 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3435 /* Create demotion operation. */
3436 vop0
= (*vec_oprnds
)[i
];
3437 vop1
= (*vec_oprnds
)[i
+ 1];
3438 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3439 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3440 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3441 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3444 /* Store the resulting vector for next recursive call. */
3445 (*vec_oprnds
)[i
/2] = new_tmp
;
3448 /* This is the last step of the conversion sequence. Store the
3449 vectors in SLP_NODE or in vector info of the scalar statement
3450 (or in STMT_VINFO_RELATED_STMT chain). */
3452 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3455 if (!*prev_stmt_info
)
3456 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3458 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3460 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3465 /* For multi-step demotion operations we first generate demotion operations
3466 from the source type to the intermediate types, and then combine the
3467 results (stored in VEC_OPRNDS) in demotion operation to the destination
3471 /* At each level of recursion we have half of the operands we had at the
3473 vec_oprnds
->truncate ((i
+1)/2);
3474 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3475 stmt
, vec_dsts
, gsi
, slp_node
,
3476 VEC_PACK_TRUNC_EXPR
,
3480 vec_dsts
.quick_push (vec_dest
);
3484 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3485 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3486 the resulting vectors and call the function recursively. */
3489 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3490 vec
<tree
> *vec_oprnds1
,
3491 gimple
*stmt
, tree vec_dest
,
3492 gimple_stmt_iterator
*gsi
,
3493 enum tree_code code1
,
3494 enum tree_code code2
, tree decl1
,
3495 tree decl2
, int op_type
)
3498 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3499 gimple
*new_stmt1
, *new_stmt2
;
3500 vec
<tree
> vec_tmp
= vNULL
;
3502 vec_tmp
.create (vec_oprnds0
->length () * 2);
3503 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3505 if (op_type
== binary_op
)
3506 vop1
= (*vec_oprnds1
)[i
];
3510 /* Generate the two halves of promotion operation. */
3511 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3512 op_type
, vec_dest
, gsi
, stmt
);
3513 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3514 op_type
, vec_dest
, gsi
, stmt
);
3515 if (is_gimple_call (new_stmt1
))
3517 new_tmp1
= gimple_call_lhs (new_stmt1
);
3518 new_tmp2
= gimple_call_lhs (new_stmt2
);
3522 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3523 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3526 /* Store the results for the next step. */
3527 vec_tmp
.quick_push (new_tmp1
);
3528 vec_tmp
.quick_push (new_tmp2
);
3531 vec_oprnds0
->release ();
3532 *vec_oprnds0
= vec_tmp
;
3536 /* Check if STMT performs a conversion operation, that can be vectorized.
3537 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3538 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3539 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3542 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3543 gimple
**vec_stmt
, slp_tree slp_node
)
3547 tree op0
, op1
= NULL_TREE
;
3548 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3549 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3550 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3551 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3552 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3553 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3557 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3558 gimple
*new_stmt
= NULL
;
3559 stmt_vec_info prev_stmt_info
;
3562 tree vectype_out
, vectype_in
;
3564 tree lhs_type
, rhs_type
;
3565 enum { NARROW
, NONE
, WIDEN
} modifier
;
3566 vec
<tree
> vec_oprnds0
= vNULL
;
3567 vec
<tree
> vec_oprnds1
= vNULL
;
3569 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3570 vec_info
*vinfo
= stmt_info
->vinfo
;
3571 int multi_step_cvt
= 0;
3572 vec
<tree
> vec_dsts
= vNULL
;
3573 vec
<tree
> interm_types
= vNULL
;
3574 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3576 machine_mode rhs_mode
;
3577 unsigned short fltsz
;
3579 /* Is STMT a vectorizable conversion? */
3581 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3584 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3587 if (!is_gimple_assign (stmt
))
3590 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3593 code
= gimple_assign_rhs_code (stmt
);
3594 if (!CONVERT_EXPR_CODE_P (code
)
3595 && code
!= FIX_TRUNC_EXPR
3596 && code
!= FLOAT_EXPR
3597 && code
!= WIDEN_MULT_EXPR
3598 && code
!= WIDEN_LSHIFT_EXPR
)
3601 op_type
= TREE_CODE_LENGTH (code
);
3603 /* Check types of lhs and rhs. */
3604 scalar_dest
= gimple_assign_lhs (stmt
);
3605 lhs_type
= TREE_TYPE (scalar_dest
);
3606 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3608 op0
= gimple_assign_rhs1 (stmt
);
3609 rhs_type
= TREE_TYPE (op0
);
3611 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3612 && !((INTEGRAL_TYPE_P (lhs_type
)
3613 && INTEGRAL_TYPE_P (rhs_type
))
3614 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3615 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3618 if ((INTEGRAL_TYPE_P (lhs_type
)
3619 && (TYPE_PRECISION (lhs_type
)
3620 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3621 || (INTEGRAL_TYPE_P (rhs_type
)
3622 && (TYPE_PRECISION (rhs_type
)
3623 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3625 if (dump_enabled_p ())
3626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3627 "type conversion to/from bit-precision unsupported."
3632 /* Check the operands of the operation. */
3633 if (!vect_is_simple_use_1 (op0
, stmt
, vinfo
,
3634 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3636 if (dump_enabled_p ())
3637 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3638 "use not simple.\n");
3641 if (op_type
== binary_op
)
3645 op1
= gimple_assign_rhs2 (stmt
);
3646 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3647 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3649 if (CONSTANT_CLASS_P (op0
))
3650 ok
= vect_is_simple_use_1 (op1
, stmt
, vinfo
,
3651 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3653 ok
= vect_is_simple_use (op1
, stmt
, vinfo
, &def_stmt
,
3658 if (dump_enabled_p ())
3659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3660 "use not simple.\n");
3665 /* If op0 is an external or constant defs use a vector type of
3666 the same size as the output vector type. */
3668 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3670 gcc_assert (vectype_in
);
3673 if (dump_enabled_p ())
3675 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3676 "no vectype for scalar type ");
3677 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3678 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3684 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3685 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3686 if (nunits_in
< nunits_out
)
3688 else if (nunits_out
== nunits_in
)
3693 /* Multiple types in SLP are handled by creating the appropriate number of
3694 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3696 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3698 else if (modifier
== NARROW
)
3699 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3701 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3703 /* Sanity check: make sure that at least one copy of the vectorized stmt
3704 needs to be generated. */
3705 gcc_assert (ncopies
>= 1);
3707 /* Supportable by target? */
3711 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3713 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3718 if (dump_enabled_p ())
3719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3720 "conversion not supported by target.\n");
3724 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3725 &code1
, &code2
, &multi_step_cvt
,
3728 /* Binary widening operation can only be supported directly by the
3730 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3734 if (code
!= FLOAT_EXPR
3735 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3736 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3739 rhs_mode
= TYPE_MODE (rhs_type
);
3740 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3741 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3742 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3743 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3746 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3747 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3748 if (cvt_type
== NULL_TREE
)
3751 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3753 if (!supportable_convert_operation (code
, vectype_out
,
3754 cvt_type
, &decl1
, &codecvt1
))
3757 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3758 cvt_type
, &codecvt1
,
3759 &codecvt2
, &multi_step_cvt
,
3763 gcc_assert (multi_step_cvt
== 0);
3765 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3766 vectype_in
, &code1
, &code2
,
3767 &multi_step_cvt
, &interm_types
))
3771 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3774 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3775 codecvt2
= ERROR_MARK
;
3779 interm_types
.safe_push (cvt_type
);
3780 cvt_type
= NULL_TREE
;
3785 gcc_assert (op_type
== unary_op
);
3786 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3787 &code1
, &multi_step_cvt
,
3791 if (code
!= FIX_TRUNC_EXPR
3792 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3793 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3796 rhs_mode
= TYPE_MODE (rhs_type
);
3798 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3799 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3800 if (cvt_type
== NULL_TREE
)
3802 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3805 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3806 &code1
, &multi_step_cvt
,
3815 if (!vec_stmt
) /* transformation not required. */
3817 if (dump_enabled_p ())
3818 dump_printf_loc (MSG_NOTE
, vect_location
,
3819 "=== vectorizable_conversion ===\n");
3820 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3822 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3823 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3825 else if (modifier
== NARROW
)
3827 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3828 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3832 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3833 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3835 interm_types
.release ();
3840 if (dump_enabled_p ())
3841 dump_printf_loc (MSG_NOTE
, vect_location
,
3842 "transform conversion. ncopies = %d.\n", ncopies
);
3844 if (op_type
== binary_op
)
3846 if (CONSTANT_CLASS_P (op0
))
3847 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3848 else if (CONSTANT_CLASS_P (op1
))
3849 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3852 /* In case of multi-step conversion, we first generate conversion operations
3853 to the intermediate types, and then from that types to the final one.
3854 We create vector destinations for the intermediate type (TYPES) received
3855 from supportable_*_operation, and store them in the correct order
3856 for future use in vect_create_vectorized_*_stmts (). */
3857 vec_dsts
.create (multi_step_cvt
+ 1);
3858 vec_dest
= vect_create_destination_var (scalar_dest
,
3859 (cvt_type
&& modifier
== WIDEN
)
3860 ? cvt_type
: vectype_out
);
3861 vec_dsts
.quick_push (vec_dest
);
3865 for (i
= interm_types
.length () - 1;
3866 interm_types
.iterate (i
, &intermediate_type
); i
--)
3868 vec_dest
= vect_create_destination_var (scalar_dest
,
3870 vec_dsts
.quick_push (vec_dest
);
3875 vec_dest
= vect_create_destination_var (scalar_dest
,
3877 ? vectype_out
: cvt_type
);
3881 if (modifier
== WIDEN
)
3883 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3884 if (op_type
== binary_op
)
3885 vec_oprnds1
.create (1);
3887 else if (modifier
== NARROW
)
3888 vec_oprnds0
.create (
3889 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3891 else if (code
== WIDEN_LSHIFT_EXPR
)
3892 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3895 prev_stmt_info
= NULL
;
3899 for (j
= 0; j
< ncopies
; j
++)
3902 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3905 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3907 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3909 /* Arguments are ready, create the new vector stmt. */
3910 if (code1
== CALL_EXPR
)
3912 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3913 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3914 gimple_call_set_lhs (new_stmt
, new_temp
);
3918 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3919 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3920 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3921 gimple_assign_set_lhs (new_stmt
, new_temp
);
3924 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3926 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3929 if (!prev_stmt_info
)
3930 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3932 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3933 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3940 /* In case the vectorization factor (VF) is bigger than the number
3941 of elements that we can fit in a vectype (nunits), we have to
3942 generate more than one vector stmt - i.e - we need to "unroll"
3943 the vector stmt by a factor VF/nunits. */
3944 for (j
= 0; j
< ncopies
; j
++)
3951 if (code
== WIDEN_LSHIFT_EXPR
)
3956 /* Store vec_oprnd1 for every vector stmt to be created
3957 for SLP_NODE. We check during the analysis that all
3958 the shift arguments are the same. */
3959 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3960 vec_oprnds1
.quick_push (vec_oprnd1
);
3962 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3966 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3967 &vec_oprnds1
, slp_node
, -1);
3971 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3972 vec_oprnds0
.quick_push (vec_oprnd0
);
3973 if (op_type
== binary_op
)
3975 if (code
== WIDEN_LSHIFT_EXPR
)
3978 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3980 vec_oprnds1
.quick_push (vec_oprnd1
);
3986 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3987 vec_oprnds0
.truncate (0);
3988 vec_oprnds0
.quick_push (vec_oprnd0
);
3989 if (op_type
== binary_op
)
3991 if (code
== WIDEN_LSHIFT_EXPR
)
3994 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3996 vec_oprnds1
.truncate (0);
3997 vec_oprnds1
.quick_push (vec_oprnd1
);
4001 /* Arguments are ready. Create the new vector stmts. */
4002 for (i
= multi_step_cvt
; i
>= 0; i
--)
4004 tree this_dest
= vec_dsts
[i
];
4005 enum tree_code c1
= code1
, c2
= code2
;
4006 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4011 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4013 stmt
, this_dest
, gsi
,
4014 c1
, c2
, decl1
, decl2
,
4018 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4022 if (codecvt1
== CALL_EXPR
)
4024 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4025 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4026 gimple_call_set_lhs (new_stmt
, new_temp
);
4030 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4031 new_temp
= make_ssa_name (vec_dest
);
4032 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4036 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4039 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4042 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4045 if (!prev_stmt_info
)
4046 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4048 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4049 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4054 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4058 /* In case the vectorization factor (VF) is bigger than the number
4059 of elements that we can fit in a vectype (nunits), we have to
4060 generate more than one vector stmt - i.e - we need to "unroll"
4061 the vector stmt by a factor VF/nunits. */
4062 for (j
= 0; j
< ncopies
; j
++)
4066 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4070 vec_oprnds0
.truncate (0);
4071 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4072 vect_pow2 (multi_step_cvt
) - 1);
4075 /* Arguments are ready. Create the new vector stmts. */
4077 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4079 if (codecvt1
== CALL_EXPR
)
4081 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4082 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4083 gimple_call_set_lhs (new_stmt
, new_temp
);
4087 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4088 new_temp
= make_ssa_name (vec_dest
);
4089 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4093 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4094 vec_oprnds0
[i
] = new_temp
;
4097 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4098 stmt
, vec_dsts
, gsi
,
4103 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4107 vec_oprnds0
.release ();
4108 vec_oprnds1
.release ();
4109 vec_dsts
.release ();
4110 interm_types
.release ();
4116 /* Function vectorizable_assignment.
4118 Check if STMT performs an assignment (copy) that can be vectorized.
4119 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4120 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4121 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4124 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4125 gimple
**vec_stmt
, slp_tree slp_node
)
4130 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4131 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4135 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4138 vec
<tree
> vec_oprnds
= vNULL
;
4140 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4141 vec_info
*vinfo
= stmt_info
->vinfo
;
4142 gimple
*new_stmt
= NULL
;
4143 stmt_vec_info prev_stmt_info
= NULL
;
4144 enum tree_code code
;
4147 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4150 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4153 /* Is vectorizable assignment? */
4154 if (!is_gimple_assign (stmt
))
4157 scalar_dest
= gimple_assign_lhs (stmt
);
4158 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4161 code
= gimple_assign_rhs_code (stmt
);
4162 if (gimple_assign_single_p (stmt
)
4163 || code
== PAREN_EXPR
4164 || CONVERT_EXPR_CODE_P (code
))
4165 op
= gimple_assign_rhs1 (stmt
);
4169 if (code
== VIEW_CONVERT_EXPR
)
4170 op
= TREE_OPERAND (op
, 0);
4172 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4173 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4175 /* Multiple types in SLP are handled by creating the appropriate number of
4176 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4178 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4181 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4183 gcc_assert (ncopies
>= 1);
4185 if (!vect_is_simple_use_1 (op
, stmt
, vinfo
,
4186 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4188 if (dump_enabled_p ())
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4190 "use not simple.\n");
4194 /* We can handle NOP_EXPR conversions that do not change the number
4195 of elements or the vector size. */
4196 if ((CONVERT_EXPR_CODE_P (code
)
4197 || code
== VIEW_CONVERT_EXPR
)
4199 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4200 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4201 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4204 /* We do not handle bit-precision changes. */
4205 if ((CONVERT_EXPR_CODE_P (code
)
4206 || code
== VIEW_CONVERT_EXPR
)
4207 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4208 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4209 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4210 || ((TYPE_PRECISION (TREE_TYPE (op
))
4211 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4212 /* But a conversion that does not change the bit-pattern is ok. */
4213 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4214 > TYPE_PRECISION (TREE_TYPE (op
)))
4215 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4217 if (dump_enabled_p ())
4218 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4219 "type conversion to/from bit-precision "
4224 if (!vec_stmt
) /* transformation not required. */
4226 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4227 if (dump_enabled_p ())
4228 dump_printf_loc (MSG_NOTE
, vect_location
,
4229 "=== vectorizable_assignment ===\n");
4230 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4235 if (dump_enabled_p ())
4236 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4239 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4242 for (j
= 0; j
< ncopies
; j
++)
4246 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4248 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4250 /* Arguments are ready. create the new vector stmt. */
4251 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4253 if (CONVERT_EXPR_CODE_P (code
)
4254 || code
== VIEW_CONVERT_EXPR
)
4255 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4256 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4257 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4258 gimple_assign_set_lhs (new_stmt
, new_temp
);
4259 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4261 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4268 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4270 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4272 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4275 vec_oprnds
.release ();
4280 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4281 either as shift by a scalar or by a vector. */
4284 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4287 machine_mode vec_mode
;
4292 vectype
= get_vectype_for_scalar_type (scalar_type
);
4296 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4298 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4300 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4302 || (optab_handler (optab
, TYPE_MODE (vectype
))
4303 == CODE_FOR_nothing
))
4307 vec_mode
= TYPE_MODE (vectype
);
4308 icode
= (int) optab_handler (optab
, vec_mode
);
4309 if (icode
== CODE_FOR_nothing
)
4316 /* Function vectorizable_shift.
4318 Check if STMT performs a shift operation that can be vectorized.
4319 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4320 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4321 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4324 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4325 gimple
**vec_stmt
, slp_tree slp_node
)
4329 tree op0
, op1
= NULL
;
4330 tree vec_oprnd1
= NULL_TREE
;
4331 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4333 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4334 enum tree_code code
;
4335 machine_mode vec_mode
;
4339 machine_mode optab_op2_mode
;
4342 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4343 gimple
*new_stmt
= NULL
;
4344 stmt_vec_info prev_stmt_info
;
4351 vec
<tree
> vec_oprnds0
= vNULL
;
4352 vec
<tree
> vec_oprnds1
= vNULL
;
4355 bool scalar_shift_arg
= true;
4356 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4357 vec_info
*vinfo
= stmt_info
->vinfo
;
4360 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4363 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4366 /* Is STMT a vectorizable binary/unary operation? */
4367 if (!is_gimple_assign (stmt
))
4370 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4373 code
= gimple_assign_rhs_code (stmt
);
4375 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4376 || code
== RROTATE_EXPR
))
4379 scalar_dest
= gimple_assign_lhs (stmt
);
4380 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4381 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4382 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4384 if (dump_enabled_p ())
4385 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4386 "bit-precision shifts not supported.\n");
4390 op0
= gimple_assign_rhs1 (stmt
);
4391 if (!vect_is_simple_use_1 (op0
, stmt
, vinfo
,
4392 &def_stmt
, &def
, &dt
[0], &vectype
))
4394 if (dump_enabled_p ())
4395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4396 "use not simple.\n");
4399 /* If op0 is an external or constant def use a vector type with
4400 the same size as the output vector type. */
4402 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4404 gcc_assert (vectype
);
4407 if (dump_enabled_p ())
4408 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4409 "no vectype for scalar type\n");
4413 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4414 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4415 if (nunits_out
!= nunits_in
)
4418 op1
= gimple_assign_rhs2 (stmt
);
4419 if (!vect_is_simple_use_1 (op1
, stmt
, vinfo
, &def_stmt
,
4420 &def
, &dt
[1], &op1_vectype
))
4422 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4424 "use not simple.\n");
4429 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4433 /* Multiple types in SLP are handled by creating the appropriate number of
4434 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4436 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4439 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4441 gcc_assert (ncopies
>= 1);
4443 /* Determine whether the shift amount is a vector, or scalar. If the
4444 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4446 if ((dt
[1] == vect_internal_def
4447 || dt
[1] == vect_induction_def
)
4449 scalar_shift_arg
= false;
4450 else if (dt
[1] == vect_constant_def
4451 || dt
[1] == vect_external_def
4452 || dt
[1] == vect_internal_def
)
4454 /* In SLP, need to check whether the shift count is the same,
4455 in loops if it is a constant or invariant, it is always
4459 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4462 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4463 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4464 scalar_shift_arg
= false;
4469 if (dump_enabled_p ())
4470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4471 "operand mode requires invariant argument.\n");
4475 /* Vector shifted by vector. */
4476 if (!scalar_shift_arg
)
4478 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_NOTE
, vect_location
,
4481 "vector/vector shift/rotate found.\n");
4484 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4485 if (op1_vectype
== NULL_TREE
4486 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4490 "unusable type for last operand in"
4491 " vector/vector shift/rotate.\n");
4495 /* See if the machine has a vector shifted by scalar insn and if not
4496 then see if it has a vector shifted by vector insn. */
4499 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4501 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4503 if (dump_enabled_p ())
4504 dump_printf_loc (MSG_NOTE
, vect_location
,
4505 "vector/scalar shift/rotate found.\n");
4509 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4511 && (optab_handler (optab
, TYPE_MODE (vectype
))
4512 != CODE_FOR_nothing
))
4514 scalar_shift_arg
= false;
4516 if (dump_enabled_p ())
4517 dump_printf_loc (MSG_NOTE
, vect_location
,
4518 "vector/vector shift/rotate found.\n");
4520 /* Unlike the other binary operators, shifts/rotates have
4521 the rhs being int, instead of the same type as the lhs,
4522 so make sure the scalar is the right type if we are
4523 dealing with vectors of long long/long/short/char. */
4524 if (dt
[1] == vect_constant_def
)
4525 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4526 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4530 && TYPE_MODE (TREE_TYPE (vectype
))
4531 != TYPE_MODE (TREE_TYPE (op1
)))
4533 if (dump_enabled_p ())
4534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4535 "unusable type for last operand in"
4536 " vector/vector shift/rotate.\n");
4539 if (vec_stmt
&& !slp_node
)
4541 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4542 op1
= vect_init_vector (stmt
, op1
,
4543 TREE_TYPE (vectype
), NULL
);
4550 /* Supportable by target? */
4553 if (dump_enabled_p ())
4554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4558 vec_mode
= TYPE_MODE (vectype
);
4559 icode
= (int) optab_handler (optab
, vec_mode
);
4560 if (icode
== CODE_FOR_nothing
)
4562 if (dump_enabled_p ())
4563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4564 "op not supported by target.\n");
4565 /* Check only during analysis. */
4566 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4567 || (vf
< vect_min_worthwhile_factor (code
)
4570 if (dump_enabled_p ())
4571 dump_printf_loc (MSG_NOTE
, vect_location
,
4572 "proceeding using word mode.\n");
4575 /* Worthwhile without SIMD support? Check only during analysis. */
4576 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4577 && vf
< vect_min_worthwhile_factor (code
)
4580 if (dump_enabled_p ())
4581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4582 "not worthwhile without SIMD support.\n");
4586 if (!vec_stmt
) /* transformation not required. */
4588 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4589 if (dump_enabled_p ())
4590 dump_printf_loc (MSG_NOTE
, vect_location
,
4591 "=== vectorizable_shift ===\n");
4592 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4598 if (dump_enabled_p ())
4599 dump_printf_loc (MSG_NOTE
, vect_location
,
4600 "transform binary/unary operation.\n");
4603 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4605 prev_stmt_info
= NULL
;
4606 for (j
= 0; j
< ncopies
; j
++)
4611 if (scalar_shift_arg
)
4613 /* Vector shl and shr insn patterns can be defined with scalar
4614 operand 2 (shift operand). In this case, use constant or loop
4615 invariant op1 directly, without extending it to vector mode
4617 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4618 if (!VECTOR_MODE_P (optab_op2_mode
))
4620 if (dump_enabled_p ())
4621 dump_printf_loc (MSG_NOTE
, vect_location
,
4622 "operand 1 using scalar mode.\n");
4624 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4625 vec_oprnds1
.quick_push (vec_oprnd1
);
4628 /* Store vec_oprnd1 for every vector stmt to be created
4629 for SLP_NODE. We check during the analysis that all
4630 the shift arguments are the same.
4631 TODO: Allow different constants for different vector
4632 stmts generated for an SLP instance. */
4633 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4634 vec_oprnds1
.quick_push (vec_oprnd1
);
4639 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4640 (a special case for certain kind of vector shifts); otherwise,
4641 operand 1 should be of a vector type (the usual case). */
4643 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4646 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4650 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4652 /* Arguments are ready. Create the new vector stmt. */
4653 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4655 vop1
= vec_oprnds1
[i
];
4656 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4657 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4658 gimple_assign_set_lhs (new_stmt
, new_temp
);
4659 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4661 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4668 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4670 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4671 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4674 vec_oprnds0
.release ();
4675 vec_oprnds1
.release ();
4681 /* Function vectorizable_operation.
4683 Check if STMT performs a binary, unary or ternary operation that can
4685 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4686 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4687 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4690 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4691 gimple
**vec_stmt
, slp_tree slp_node
)
4695 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4696 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4698 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4699 enum tree_code code
;
4700 machine_mode vec_mode
;
4704 bool target_support_p
;
4707 enum vect_def_type dt
[3]
4708 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4709 gimple
*new_stmt
= NULL
;
4710 stmt_vec_info prev_stmt_info
;
4716 vec
<tree
> vec_oprnds0
= vNULL
;
4717 vec
<tree
> vec_oprnds1
= vNULL
;
4718 vec
<tree
> vec_oprnds2
= vNULL
;
4719 tree vop0
, vop1
, vop2
;
4720 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4721 vec_info
*vinfo
= stmt_info
->vinfo
;
4724 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4727 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4730 /* Is STMT a vectorizable binary/unary operation? */
4731 if (!is_gimple_assign (stmt
))
4734 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4737 code
= gimple_assign_rhs_code (stmt
);
4739 /* For pointer addition, we should use the normal plus for
4740 the vector addition. */
4741 if (code
== POINTER_PLUS_EXPR
)
4744 /* Support only unary or binary operations. */
4745 op_type
= TREE_CODE_LENGTH (code
);
4746 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4748 if (dump_enabled_p ())
4749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4750 "num. args = %d (not unary/binary/ternary op).\n",
4755 scalar_dest
= gimple_assign_lhs (stmt
);
4756 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4758 /* Most operations cannot handle bit-precision types without extra
4760 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4761 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4762 /* Exception are bitwise binary operations. */
4763 && code
!= BIT_IOR_EXPR
4764 && code
!= BIT_XOR_EXPR
4765 && code
!= BIT_AND_EXPR
)
4767 if (dump_enabled_p ())
4768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4769 "bit-precision arithmetic not supported.\n");
4773 op0
= gimple_assign_rhs1 (stmt
);
4774 if (!vect_is_simple_use_1 (op0
, stmt
, vinfo
,
4775 &def_stmt
, &def
, &dt
[0], &vectype
))
4777 if (dump_enabled_p ())
4778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4779 "use not simple.\n");
4782 /* If op0 is an external or constant def use a vector type with
4783 the same size as the output vector type. */
4785 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4787 gcc_assert (vectype
);
4790 if (dump_enabled_p ())
4792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4793 "no vectype for scalar type ");
4794 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4796 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4802 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4803 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4804 if (nunits_out
!= nunits_in
)
4807 if (op_type
== binary_op
|| op_type
== ternary_op
)
4809 op1
= gimple_assign_rhs2 (stmt
);
4810 if (!vect_is_simple_use (op1
, stmt
, vinfo
, &def_stmt
,
4813 if (dump_enabled_p ())
4814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4815 "use not simple.\n");
4819 if (op_type
== ternary_op
)
4821 op2
= gimple_assign_rhs3 (stmt
);
4822 if (!vect_is_simple_use (op2
, stmt
, vinfo
, &def_stmt
,
4825 if (dump_enabled_p ())
4826 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4827 "use not simple.\n");
4833 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4837 /* Multiple types in SLP are handled by creating the appropriate number of
4838 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4840 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4843 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4845 gcc_assert (ncopies
>= 1);
4847 /* Shifts are handled in vectorizable_shift (). */
4848 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4849 || code
== RROTATE_EXPR
)
4852 /* Supportable by target? */
4854 vec_mode
= TYPE_MODE (vectype
);
4855 if (code
== MULT_HIGHPART_EXPR
)
4856 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
4859 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4862 if (dump_enabled_p ())
4863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4867 target_support_p
= (optab_handler (optab
, vec_mode
)
4868 != CODE_FOR_nothing
);
4871 if (!target_support_p
)
4873 if (dump_enabled_p ())
4874 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4875 "op not supported by target.\n");
4876 /* Check only during analysis. */
4877 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4878 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4880 if (dump_enabled_p ())
4881 dump_printf_loc (MSG_NOTE
, vect_location
,
4882 "proceeding using word mode.\n");
4885 /* Worthwhile without SIMD support? Check only during analysis. */
4886 if (!VECTOR_MODE_P (vec_mode
)
4888 && vf
< vect_min_worthwhile_factor (code
))
4890 if (dump_enabled_p ())
4891 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4892 "not worthwhile without SIMD support.\n");
4896 if (!vec_stmt
) /* transformation not required. */
4898 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4899 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_NOTE
, vect_location
,
4901 "=== vectorizable_operation ===\n");
4902 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4908 if (dump_enabled_p ())
4909 dump_printf_loc (MSG_NOTE
, vect_location
,
4910 "transform binary/unary operation.\n");
4913 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4915 /* In case the vectorization factor (VF) is bigger than the number
4916 of elements that we can fit in a vectype (nunits), we have to generate
4917 more than one vector stmt - i.e - we need to "unroll" the
4918 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4919 from one copy of the vector stmt to the next, in the field
4920 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4921 stages to find the correct vector defs to be used when vectorizing
4922 stmts that use the defs of the current stmt. The example below
4923 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4924 we need to create 4 vectorized stmts):
4926 before vectorization:
4927 RELATED_STMT VEC_STMT
4931 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4933 RELATED_STMT VEC_STMT
4934 VS1_0: vx0 = memref0 VS1_1 -
4935 VS1_1: vx1 = memref1 VS1_2 -
4936 VS1_2: vx2 = memref2 VS1_3 -
4937 VS1_3: vx3 = memref3 - -
4938 S1: x = load - VS1_0
4941 step2: vectorize stmt S2 (done here):
4942 To vectorize stmt S2 we first need to find the relevant vector
4943 def for the first operand 'x'. This is, as usual, obtained from
4944 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4945 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4946 relevant vector def 'vx0'. Having found 'vx0' we can generate
4947 the vector stmt VS2_0, and as usual, record it in the
4948 STMT_VINFO_VEC_STMT of stmt S2.
4949 When creating the second copy (VS2_1), we obtain the relevant vector
4950 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4951 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4952 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4953 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4954 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4955 chain of stmts and pointers:
4956 RELATED_STMT VEC_STMT
4957 VS1_0: vx0 = memref0 VS1_1 -
4958 VS1_1: vx1 = memref1 VS1_2 -
4959 VS1_2: vx2 = memref2 VS1_3 -
4960 VS1_3: vx3 = memref3 - -
4961 S1: x = load - VS1_0
4962 VS2_0: vz0 = vx0 + v1 VS2_1 -
4963 VS2_1: vz1 = vx1 + v1 VS2_2 -
4964 VS2_2: vz2 = vx2 + v1 VS2_3 -
4965 VS2_3: vz3 = vx3 + v1 - -
4966 S2: z = x + 1 - VS2_0 */
4968 prev_stmt_info
= NULL
;
4969 for (j
= 0; j
< ncopies
; j
++)
4974 if (op_type
== binary_op
|| op_type
== ternary_op
)
4975 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4978 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4980 if (op_type
== ternary_op
)
4982 vec_oprnds2
.create (1);
4983 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4990 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4991 if (op_type
== ternary_op
)
4993 tree vec_oprnd
= vec_oprnds2
.pop ();
4994 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4999 /* Arguments are ready. Create the new vector stmt. */
5000 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5002 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5003 ? vec_oprnds1
[i
] : NULL_TREE
);
5004 vop2
= ((op_type
== ternary_op
)
5005 ? vec_oprnds2
[i
] : NULL_TREE
);
5006 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5007 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5008 gimple_assign_set_lhs (new_stmt
, new_temp
);
5009 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5011 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5018 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5020 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5021 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5024 vec_oprnds0
.release ();
5025 vec_oprnds1
.release ();
5026 vec_oprnds2
.release ();
5031 /* A helper function to ensure data reference DR's base alignment
5035 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5040 if (DR_VECT_AUX (dr
)->base_misaligned
)
5042 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5043 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5045 if (decl_in_symtab_p (base_decl
))
5046 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5049 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
5050 DECL_USER_ALIGN (base_decl
) = 1;
5052 DR_VECT_AUX (dr
)->base_misaligned
= false;
5057 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5058 reversal of the vector elements. If that is impossible to do,
5062 perm_mask_for_reverse (tree vectype
)
5067 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5068 sel
= XALLOCAVEC (unsigned char, nunits
);
5070 for (i
= 0; i
< nunits
; ++i
)
5071 sel
[i
] = nunits
- 1 - i
;
5073 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5075 return vect_gen_perm_mask_checked (vectype
, sel
);
5078 /* Function vectorizable_store.
5080 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5082 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5083 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5084 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5087 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5093 tree vec_oprnd
= NULL_TREE
;
5094 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5095 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5097 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5098 struct loop
*loop
= NULL
;
5099 machine_mode vec_mode
;
5101 enum dr_alignment_support alignment_support_scheme
;
5104 enum vect_def_type dt
;
5105 stmt_vec_info prev_stmt_info
= NULL
;
5106 tree dataref_ptr
= NULL_TREE
;
5107 tree dataref_offset
= NULL_TREE
;
5108 gimple
*ptr_incr
= NULL
;
5111 gimple
*next_stmt
, *first_stmt
= NULL
;
5112 bool grouped_store
= false;
5113 bool store_lanes_p
= false;
5114 unsigned int group_size
, i
;
5115 vec
<tree
> dr_chain
= vNULL
;
5116 vec
<tree
> oprnds
= vNULL
;
5117 vec
<tree
> result_chain
= vNULL
;
5119 bool negative
= false;
5120 tree offset
= NULL_TREE
;
5121 vec
<tree
> vec_oprnds
= vNULL
;
5122 bool slp
= (slp_node
!= NULL
);
5123 unsigned int vec_num
;
5124 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5125 vec_info
*vinfo
= stmt_info
->vinfo
;
5127 tree scatter_base
= NULL_TREE
, scatter_off
= NULL_TREE
;
5128 tree scatter_off_vectype
= NULL_TREE
, scatter_decl
= NULL_TREE
;
5129 int scatter_scale
= 1;
5130 enum vect_def_type scatter_idx_dt
= vect_unknown_def_type
;
5131 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5134 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5137 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5140 /* Is vectorizable store? */
5142 if (!is_gimple_assign (stmt
))
5145 scalar_dest
= gimple_assign_lhs (stmt
);
5146 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5147 && is_pattern_stmt_p (stmt_info
))
5148 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5149 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5150 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5151 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5152 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5153 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5154 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5155 && TREE_CODE (scalar_dest
) != MEM_REF
)
5158 gcc_assert (gimple_assign_single_p (stmt
));
5160 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5161 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5164 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5166 /* Multiple types in SLP are handled by creating the appropriate number of
5167 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5169 if (slp
|| PURE_SLP_STMT (stmt_info
))
5172 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5174 gcc_assert (ncopies
>= 1);
5176 /* FORNOW. This restriction should be relaxed. */
5177 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5179 if (dump_enabled_p ())
5180 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5181 "multiple types in nested loop.\n");
5185 op
= gimple_assign_rhs1 (stmt
);
5186 if (!vect_is_simple_use (op
, stmt
, vinfo
, &def_stmt
,
5189 if (dump_enabled_p ())
5190 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5191 "use not simple.\n");
5195 elem_type
= TREE_TYPE (vectype
);
5196 vec_mode
= TYPE_MODE (vectype
);
5198 /* FORNOW. In some cases can vectorize even if data-type not supported
5199 (e.g. - array initialization with 0). */
5200 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5203 if (!STMT_VINFO_DATA_REF (stmt_info
))
5206 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5209 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5210 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5211 size_zero_node
) < 0;
5212 if (negative
&& ncopies
> 1)
5214 if (dump_enabled_p ())
5215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5216 "multiple types with negative step.\n");
5221 gcc_assert (!grouped_store
);
5222 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5223 if (alignment_support_scheme
!= dr_aligned
5224 && alignment_support_scheme
!= dr_unaligned_supported
)
5226 if (dump_enabled_p ())
5227 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5228 "negative step but alignment required.\n");
5231 if (dt
!= vect_constant_def
5232 && dt
!= vect_external_def
5233 && !perm_mask_for_reverse (vectype
))
5235 if (dump_enabled_p ())
5236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5237 "negative step and reversing not supported.\n");
5243 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5245 grouped_store
= true;
5246 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5247 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5249 && !PURE_SLP_STMT (stmt_info
)
5250 && !STMT_VINFO_STRIDED_P (stmt_info
))
5252 if (vect_store_lanes_supported (vectype
, group_size
))
5253 store_lanes_p
= true;
5254 else if (!vect_grouped_store_supported (vectype
, group_size
))
5258 if (STMT_VINFO_STRIDED_P (stmt_info
)
5259 && (slp
|| PURE_SLP_STMT (stmt_info
))
5260 && (group_size
> nunits
5261 || nunits
% group_size
!= 0))
5263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5264 "unhandled strided group store\n");
5268 if (first_stmt
== stmt
)
5270 /* STMT is the leader of the group. Check the operands of all the
5271 stmts of the group. */
5272 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5275 gcc_assert (gimple_assign_single_p (next_stmt
));
5276 op
= gimple_assign_rhs1 (next_stmt
);
5277 if (!vect_is_simple_use (op
, next_stmt
, vinfo
,
5278 &def_stmt
, &def
, &dt
))
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5282 "use not simple.\n");
5285 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5290 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5294 scatter_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &scatter_base
,
5295 &scatter_off
, &scatter_scale
);
5296 gcc_assert (scatter_decl
);
5297 if (!vect_is_simple_use_1 (scatter_off
, NULL
, vinfo
,
5298 &def_stmt
, &def
, &scatter_idx_dt
,
5299 &scatter_off_vectype
))
5301 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5303 "scatter index use not simple.");
5308 if (!vec_stmt
) /* transformation not required. */
5310 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5311 /* The SLP costs are calculated during SLP analysis. */
5312 if (!PURE_SLP_STMT (stmt_info
))
5313 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5320 ensure_base_align (stmt_info
, dr
);
5322 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5324 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5325 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (scatter_decl
));
5326 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5327 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5328 edge pe
= loop_preheader_edge (loop
);
5331 enum { NARROW
, NONE
, WIDEN
} modifier
;
5332 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (scatter_off_vectype
);
5334 if (nunits
== (unsigned int) scatter_off_nunits
)
5336 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5338 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5341 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5342 sel
[i
] = i
| nunits
;
5344 perm_mask
= vect_gen_perm_mask_checked (scatter_off_vectype
, sel
);
5345 gcc_assert (perm_mask
!= NULL_TREE
);
5347 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5349 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5352 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5353 sel
[i
] = i
| scatter_off_nunits
;
5355 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5356 gcc_assert (perm_mask
!= NULL_TREE
);
5362 rettype
= TREE_TYPE (TREE_TYPE (scatter_decl
));
5363 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5364 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5365 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5366 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5367 scaletype
= TREE_VALUE (arglist
);
5369 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5370 && TREE_CODE (rettype
) == VOID_TYPE
);
5372 ptr
= fold_convert (ptrtype
, scatter_base
);
5373 if (!is_gimple_min_invariant (ptr
))
5375 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5376 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5377 gcc_assert (!new_bb
);
5380 /* Currently we support only unconditional scatter stores,
5381 so mask should be all ones. */
5382 mask
= build_int_cst (masktype
, -1);
5383 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5385 scale
= build_int_cst (scaletype
, scatter_scale
);
5387 prev_stmt_info
= NULL
;
5388 for (j
= 0; j
< ncopies
; ++j
)
5393 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
, NULL
);
5395 = vect_get_vec_def_for_operand (scatter_off
, stmt
, NULL
);
5397 else if (modifier
!= NONE
&& (j
& 1))
5399 if (modifier
== WIDEN
)
5402 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5403 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5406 else if (modifier
== NARROW
)
5408 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5411 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5419 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5421 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5424 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5426 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5427 == TYPE_VECTOR_SUBPARTS (srctype
));
5428 var
= vect_get_new_vect_var (srctype
, vect_simple_var
, NULL
);
5429 var
= make_ssa_name (var
);
5430 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5431 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5432 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5436 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5438 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5439 == TYPE_VECTOR_SUBPARTS (idxtype
));
5440 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
5441 var
= make_ssa_name (var
);
5442 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5443 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5444 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5449 = gimple_build_call (scatter_decl
, 5, ptr
, mask
, op
, src
, scale
);
5451 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5453 if (prev_stmt_info
== NULL
)
5454 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5456 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5457 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5464 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5465 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5467 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5470 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5472 /* We vectorize all the stmts of the interleaving group when we
5473 reach the last stmt in the group. */
5474 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5475 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5484 grouped_store
= false;
5485 /* VEC_NUM is the number of vect stmts to be created for this
5487 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5488 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5489 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5490 op
= gimple_assign_rhs1 (first_stmt
);
5493 /* VEC_NUM is the number of vect stmts to be created for this
5495 vec_num
= group_size
;
5501 group_size
= vec_num
= 1;
5504 if (dump_enabled_p ())
5505 dump_printf_loc (MSG_NOTE
, vect_location
,
5506 "transform store. ncopies = %d\n", ncopies
);
5508 if (STMT_VINFO_STRIDED_P (stmt_info
))
5510 gimple_stmt_iterator incr_gsi
;
5516 gimple_seq stmts
= NULL
;
5517 tree stride_base
, stride_step
, alias_off
;
5521 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5524 = fold_build_pointer_plus
5525 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5526 size_binop (PLUS_EXPR
,
5527 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5528 convert_to_ptrofftype (DR_INIT(first_dr
))));
5529 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5531 /* For a store with loop-invariant (but other than power-of-2)
5532 stride (i.e. not a grouped access) like so:
5534 for (i = 0; i < n; i += stride)
5537 we generate a new induction variable and new stores from
5538 the components of the (vectorized) rhs:
5540 for (j = 0; ; j += VF*stride)
5545 array[j + stride] = tmp2;
5549 unsigned nstores
= nunits
;
5550 tree ltype
= elem_type
;
5553 nstores
= nunits
/ group_size
;
5554 if (group_size
< nunits
)
5555 ltype
= build_vector_type (elem_type
, group_size
);
5558 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5559 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5563 ivstep
= stride_step
;
5564 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5565 build_int_cst (TREE_TYPE (ivstep
),
5566 ncopies
* nstores
));
5568 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5570 create_iv (stride_base
, ivstep
, NULL
,
5571 loop
, &incr_gsi
, insert_after
,
5573 incr
= gsi_stmt (incr_gsi
);
5574 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5576 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5578 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5580 prev_stmt_info
= NULL
;
5581 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5582 next_stmt
= first_stmt
;
5583 for (g
= 0; g
< group_size
; g
++)
5585 running_off
= offvar
;
5588 tree size
= TYPE_SIZE_UNIT (ltype
);
5589 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5591 tree newoff
= copy_ssa_name (running_off
, NULL
);
5592 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5594 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5595 running_off
= newoff
;
5597 for (j
= 0; j
< ncopies
; j
++)
5599 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5600 and first_stmt == stmt. */
5605 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5607 vec_oprnd
= vec_oprnds
[0];
5611 gcc_assert (gimple_assign_single_p (next_stmt
));
5612 op
= gimple_assign_rhs1 (next_stmt
);
5613 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5620 vec_oprnd
= vec_oprnds
[j
];
5623 vect_is_simple_use (vec_oprnd
, NULL
, vinfo
,
5624 &def_stmt
, &def
, &dt
);
5625 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5629 for (i
= 0; i
< nstores
; i
++)
5631 tree newref
, newoff
;
5632 gimple
*incr
, *assign
;
5633 tree size
= TYPE_SIZE (ltype
);
5634 /* Extract the i'th component. */
5635 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5636 bitsize_int (i
), size
);
5637 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5640 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5644 newref
= build2 (MEM_REF
, ltype
,
5645 running_off
, alias_off
);
5647 /* And store it to *running_off. */
5648 assign
= gimple_build_assign (newref
, elem
);
5649 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5651 newoff
= copy_ssa_name (running_off
, NULL
);
5652 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5653 running_off
, stride_step
);
5654 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5656 running_off
= newoff
;
5657 if (g
== group_size
- 1
5660 if (j
== 0 && i
== 0)
5661 STMT_VINFO_VEC_STMT (stmt_info
)
5662 = *vec_stmt
= assign
;
5664 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5665 prev_stmt_info
= vinfo_for_stmt (assign
);
5669 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5674 dr_chain
.create (group_size
);
5675 oprnds
.create (group_size
);
5677 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5678 gcc_assert (alignment_support_scheme
);
5679 /* Targets with store-lane instructions must not require explicit
5681 gcc_assert (!store_lanes_p
5682 || alignment_support_scheme
== dr_aligned
5683 || alignment_support_scheme
== dr_unaligned_supported
);
5686 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5689 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5691 aggr_type
= vectype
;
5693 /* In case the vectorization factor (VF) is bigger than the number
5694 of elements that we can fit in a vectype (nunits), we have to generate
5695 more than one vector stmt - i.e - we need to "unroll" the
5696 vector stmt by a factor VF/nunits. For more details see documentation in
5697 vect_get_vec_def_for_copy_stmt. */
5699 /* In case of interleaving (non-unit grouped access):
5706 We create vectorized stores starting from base address (the access of the
5707 first stmt in the chain (S2 in the above example), when the last store stmt
5708 of the chain (S4) is reached:
5711 VS2: &base + vec_size*1 = vx0
5712 VS3: &base + vec_size*2 = vx1
5713 VS4: &base + vec_size*3 = vx3
5715 Then permutation statements are generated:
5717 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5718 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5721 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5722 (the order of the data-refs in the output of vect_permute_store_chain
5723 corresponds to the order of scalar stmts in the interleaving chain - see
5724 the documentation of vect_permute_store_chain()).
5726 In case of both multiple types and interleaving, above vector stores and
5727 permutation stmts are created for every copy. The result vector stmts are
5728 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5729 STMT_VINFO_RELATED_STMT for the next copies.
5732 prev_stmt_info
= NULL
;
5733 for (j
= 0; j
< ncopies
; j
++)
5740 /* Get vectorized arguments for SLP_NODE. */
5741 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5742 NULL
, slp_node
, -1);
5744 vec_oprnd
= vec_oprnds
[0];
5748 /* For interleaved stores we collect vectorized defs for all the
5749 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5750 used as an input to vect_permute_store_chain(), and OPRNDS as
5751 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5753 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5754 OPRNDS are of size 1. */
5755 next_stmt
= first_stmt
;
5756 for (i
= 0; i
< group_size
; i
++)
5758 /* Since gaps are not supported for interleaved stores,
5759 GROUP_SIZE is the exact number of stmts in the chain.
5760 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5761 there is no interleaving, GROUP_SIZE is 1, and only one
5762 iteration of the loop will be executed. */
5763 gcc_assert (next_stmt
5764 && gimple_assign_single_p (next_stmt
));
5765 op
= gimple_assign_rhs1 (next_stmt
);
5767 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5769 dr_chain
.quick_push (vec_oprnd
);
5770 oprnds
.quick_push (vec_oprnd
);
5771 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5775 /* We should have catched mismatched types earlier. */
5776 gcc_assert (useless_type_conversion_p (vectype
,
5777 TREE_TYPE (vec_oprnd
)));
5778 bool simd_lane_access_p
5779 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5780 if (simd_lane_access_p
5781 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5782 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5783 && integer_zerop (DR_OFFSET (first_dr
))
5784 && integer_zerop (DR_INIT (first_dr
))
5785 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5786 get_alias_set (DR_REF (first_dr
))))
5788 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5789 dataref_offset
= build_int_cst (reference_alias_ptr_type
5790 (DR_REF (first_dr
)), 0);
5795 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5796 simd_lane_access_p
? loop
: NULL
,
5797 offset
, &dummy
, gsi
, &ptr_incr
,
5798 simd_lane_access_p
, &inv_p
);
5799 gcc_assert (bb_vinfo
|| !inv_p
);
5803 /* For interleaved stores we created vectorized defs for all the
5804 defs stored in OPRNDS in the previous iteration (previous copy).
5805 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5806 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5808 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5809 OPRNDS are of size 1. */
5810 for (i
= 0; i
< group_size
; i
++)
5813 vect_is_simple_use (op
, NULL
, vinfo
, &def_stmt
,
5815 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5816 dr_chain
[i
] = vec_oprnd
;
5817 oprnds
[i
] = vec_oprnd
;
5821 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5822 TYPE_SIZE_UNIT (aggr_type
));
5824 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5825 TYPE_SIZE_UNIT (aggr_type
));
5832 /* Combine all the vectors into an array. */
5833 vec_array
= create_vector_array (vectype
, vec_num
);
5834 for (i
= 0; i
< vec_num
; i
++)
5836 vec_oprnd
= dr_chain
[i
];
5837 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5841 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5842 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5843 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5844 gimple_call_set_lhs (new_stmt
, data_ref
);
5845 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5853 result_chain
.create (group_size
);
5855 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5859 next_stmt
= first_stmt
;
5860 for (i
= 0; i
< vec_num
; i
++)
5862 unsigned align
, misalign
;
5865 /* Bump the vector pointer. */
5866 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5870 vec_oprnd
= vec_oprnds
[i
];
5871 else if (grouped_store
)
5872 /* For grouped stores vectorized defs are interleaved in
5873 vect_permute_store_chain(). */
5874 vec_oprnd
= result_chain
[i
];
5876 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
5880 : build_int_cst (reference_alias_ptr_type
5881 (DR_REF (first_dr
)), 0));
5882 align
= TYPE_ALIGN_UNIT (vectype
);
5883 if (aligned_access_p (first_dr
))
5885 else if (DR_MISALIGNMENT (first_dr
) == -1)
5887 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
5888 align
= TYPE_ALIGN_UNIT (elem_type
);
5890 align
= get_object_alignment (DR_REF (first_dr
))
5893 TREE_TYPE (data_ref
)
5894 = build_aligned_type (TREE_TYPE (data_ref
),
5895 align
* BITS_PER_UNIT
);
5899 TREE_TYPE (data_ref
)
5900 = build_aligned_type (TREE_TYPE (data_ref
),
5901 TYPE_ALIGN (elem_type
));
5902 misalign
= DR_MISALIGNMENT (first_dr
);
5904 if (dataref_offset
== NULL_TREE
5905 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
5906 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5910 && dt
!= vect_constant_def
5911 && dt
!= vect_external_def
)
5913 tree perm_mask
= perm_mask_for_reverse (vectype
);
5915 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5917 tree new_temp
= make_ssa_name (perm_dest
);
5919 /* Generate the permute statement. */
5921 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
5922 vec_oprnd
, perm_mask
);
5923 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5925 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5926 vec_oprnd
= new_temp
;
5929 /* Arguments are ready. Create the new vector stmt. */
5930 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5931 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5936 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5944 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5946 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5947 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5951 dr_chain
.release ();
5953 result_chain
.release ();
5954 vec_oprnds
.release ();
5959 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5960 VECTOR_CST mask. No checks are made that the target platform supports the
5961 mask, so callers may wish to test can_vec_perm_p separately, or use
5962 vect_gen_perm_mask_checked. */
5965 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
5967 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5970 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5972 mask_elt_type
= lang_hooks
.types
.type_for_mode
5973 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5974 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5976 mask_elts
= XALLOCAVEC (tree
, nunits
);
5977 for (i
= nunits
- 1; i
>= 0; i
--)
5978 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5979 mask_vec
= build_vector (mask_type
, mask_elts
);
5984 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5985 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5988 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
5990 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
5991 return vect_gen_perm_mask_any (vectype
, sel
);
5994 /* Given a vector variable X and Y, that was generated for the scalar
5995 STMT, generate instructions to permute the vector elements of X and Y
5996 using permutation mask MASK_VEC, insert them at *GSI and return the
5997 permuted vector variable. */
6000 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6001 gimple_stmt_iterator
*gsi
)
6003 tree vectype
= TREE_TYPE (x
);
6004 tree perm_dest
, data_ref
;
6007 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6008 data_ref
= make_ssa_name (perm_dest
);
6010 /* Generate the permute statement. */
6011 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6012 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6017 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6018 inserting them on the loops preheader edge. Returns true if we
6019 were successful in doing so (and thus STMT can be moved then),
6020 otherwise returns false. */
6023 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6029 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6031 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6032 if (!gimple_nop_p (def_stmt
)
6033 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6035 /* Make sure we don't need to recurse. While we could do
6036 so in simple cases when there are more complex use webs
6037 we don't have an easy way to preserve stmt order to fulfil
6038 dependencies within them. */
6041 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6043 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6045 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6046 if (!gimple_nop_p (def_stmt2
)
6047 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6057 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6059 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6060 if (!gimple_nop_p (def_stmt
)
6061 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6063 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6064 gsi_remove (&gsi
, false);
6065 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6072 /* vectorizable_load.
6074 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6076 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6077 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6078 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6081 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6082 slp_tree slp_node
, slp_instance slp_node_instance
)
6085 tree vec_dest
= NULL
;
6086 tree data_ref
= NULL
;
6087 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6088 stmt_vec_info prev_stmt_info
;
6089 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6090 struct loop
*loop
= NULL
;
6091 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6092 bool nested_in_vect_loop
= false;
6093 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6097 gimple
*new_stmt
= NULL
;
6099 enum dr_alignment_support alignment_support_scheme
;
6100 tree dataref_ptr
= NULL_TREE
;
6101 tree dataref_offset
= NULL_TREE
;
6102 gimple
*ptr_incr
= NULL
;
6104 int i
, j
, group_size
= -1, group_gap_adj
;
6105 tree msq
= NULL_TREE
, lsq
;
6106 tree offset
= NULL_TREE
;
6107 tree byte_offset
= NULL_TREE
;
6108 tree realignment_token
= NULL_TREE
;
6110 vec
<tree
> dr_chain
= vNULL
;
6111 bool grouped_load
= false;
6112 bool load_lanes_p
= false;
6115 bool negative
= false;
6116 bool compute_in_loop
= false;
6117 struct loop
*at_loop
;
6119 bool slp
= (slp_node
!= NULL
);
6120 bool slp_perm
= false;
6121 enum tree_code code
;
6122 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6125 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
6126 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
6127 int gather_scale
= 1;
6128 enum vect_def_type gather_dt
= vect_unknown_def_type
;
6129 vec_info
*vinfo
= stmt_info
->vinfo
;
6131 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6134 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
6137 /* Is vectorizable load? */
6138 if (!is_gimple_assign (stmt
))
6141 scalar_dest
= gimple_assign_lhs (stmt
);
6142 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6145 code
= gimple_assign_rhs_code (stmt
);
6146 if (code
!= ARRAY_REF
6147 && code
!= BIT_FIELD_REF
6148 && code
!= INDIRECT_REF
6149 && code
!= COMPONENT_REF
6150 && code
!= IMAGPART_EXPR
6151 && code
!= REALPART_EXPR
6153 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6156 if (!STMT_VINFO_DATA_REF (stmt_info
))
6159 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6160 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6164 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6165 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6166 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6171 /* Multiple types in SLP are handled by creating the appropriate number of
6172 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6174 if (slp
|| PURE_SLP_STMT (stmt_info
))
6177 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6179 gcc_assert (ncopies
>= 1);
6181 /* FORNOW. This restriction should be relaxed. */
6182 if (nested_in_vect_loop
&& ncopies
> 1)
6184 if (dump_enabled_p ())
6185 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6186 "multiple types in nested loop.\n");
6190 /* Invalidate assumptions made by dependence analysis when vectorization
6191 on the unrolled body effectively re-orders stmts. */
6193 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6194 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6195 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6197 if (dump_enabled_p ())
6198 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6199 "cannot perform implicit CSE when unrolling "
6200 "with negative dependence distance\n");
6204 elem_type
= TREE_TYPE (vectype
);
6205 mode
= TYPE_MODE (vectype
);
6207 /* FORNOW. In some cases can vectorize even if data-type not supported
6208 (e.g. - data copies). */
6209 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6211 if (dump_enabled_p ())
6212 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6213 "Aligned load, but unsupported type.\n");
6217 /* Check if the load is a part of an interleaving chain. */
6218 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6220 grouped_load
= true;
6222 gcc_assert (!nested_in_vect_loop
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6224 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6226 /* If this is single-element interleaving with an element distance
6227 that leaves unused vector loads around punt - we at least create
6228 very sub-optimal code in that case (and blow up memory,
6230 if (first_stmt
== stmt
6231 && !GROUP_NEXT_ELEMENT (stmt_info
)
6232 && GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
6234 if (dump_enabled_p ())
6235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6236 "single-element interleaving not supported "
6237 "for not adjacent vector loads\n");
6241 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6244 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6246 && !PURE_SLP_STMT (stmt_info
)
6247 && !STMT_VINFO_STRIDED_P (stmt_info
))
6249 if (vect_load_lanes_supported (vectype
, group_size
))
6250 load_lanes_p
= true;
6251 else if (!vect_grouped_load_supported (vectype
, group_size
))
6255 /* Invalidate assumptions made by dependence analysis when vectorization
6256 on the unrolled body effectively re-orders stmts. */
6257 if (!PURE_SLP_STMT (stmt_info
)
6258 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6259 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6260 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6264 "cannot perform implicit CSE when performing "
6265 "group loads with negative dependence distance\n");
6269 /* Similarly when the stmt is a load that is both part of a SLP
6270 instance and a loop vectorized stmt via the same-dr mechanism
6271 we have to give up. */
6272 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6273 && (STMT_SLP_TYPE (stmt_info
)
6274 != STMT_SLP_TYPE (vinfo_for_stmt
6275 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6277 if (dump_enabled_p ())
6278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6279 "conflicting SLP types for CSEd load\n");
6285 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6289 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
6290 &gather_off
, &gather_scale
);
6291 gcc_assert (gather_decl
);
6292 if (!vect_is_simple_use_1 (gather_off
, NULL
, vinfo
,
6293 &def_stmt
, &def
, &gather_dt
,
6294 &gather_off_vectype
))
6296 if (dump_enabled_p ())
6297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6298 "gather index use not simple.\n");
6302 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6305 && (slp
|| PURE_SLP_STMT (stmt_info
)))
6306 && (group_size
> nunits
6307 || nunits
% group_size
!= 0))
6309 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6310 "unhandled strided group load\n");
6316 negative
= tree_int_cst_compare (nested_in_vect_loop
6317 ? STMT_VINFO_DR_STEP (stmt_info
)
6319 size_zero_node
) < 0;
6320 if (negative
&& ncopies
> 1)
6322 if (dump_enabled_p ())
6323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6324 "multiple types with negative step.\n");
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6334 "negative step for group load not supported"
6338 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6339 if (alignment_support_scheme
!= dr_aligned
6340 && alignment_support_scheme
!= dr_unaligned_supported
)
6342 if (dump_enabled_p ())
6343 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6344 "negative step but alignment required.\n");
6347 if (!perm_mask_for_reverse (vectype
))
6349 if (dump_enabled_p ())
6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6351 "negative step and reversing not supported."
6358 if (!vec_stmt
) /* transformation not required. */
6360 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6361 /* The SLP costs are calculated during SLP analysis. */
6362 if (!PURE_SLP_STMT (stmt_info
))
6363 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6368 if (dump_enabled_p ())
6369 dump_printf_loc (MSG_NOTE
, vect_location
,
6370 "transform load. ncopies = %d\n", ncopies
);
6374 ensure_base_align (stmt_info
, dr
);
6376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6378 tree vec_oprnd0
= NULL_TREE
, op
;
6379 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6380 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6381 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6382 edge pe
= loop_preheader_edge (loop
);
6385 enum { NARROW
, NONE
, WIDEN
} modifier
;
6386 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6388 if (nunits
== gather_off_nunits
)
6390 else if (nunits
== gather_off_nunits
/ 2)
6392 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6395 for (i
= 0; i
< gather_off_nunits
; ++i
)
6396 sel
[i
] = i
| nunits
;
6398 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6400 else if (nunits
== gather_off_nunits
* 2)
6402 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6405 for (i
= 0; i
< nunits
; ++i
)
6406 sel
[i
] = i
< gather_off_nunits
6407 ? i
: i
+ nunits
- gather_off_nunits
;
6409 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6415 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6416 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6417 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6418 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6419 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6420 scaletype
= TREE_VALUE (arglist
);
6421 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6423 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6425 ptr
= fold_convert (ptrtype
, gather_base
);
6426 if (!is_gimple_min_invariant (ptr
))
6428 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6429 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6430 gcc_assert (!new_bb
);
6433 /* Currently we support only unconditional gather loads,
6434 so mask should be all ones. */
6435 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6436 mask
= build_int_cst (masktype
, -1);
6437 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6439 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6440 mask
= build_vector_from_val (masktype
, mask
);
6441 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6443 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6447 for (j
= 0; j
< 6; ++j
)
6449 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6450 mask
= build_real (TREE_TYPE (masktype
), r
);
6451 mask
= build_vector_from_val (masktype
, mask
);
6452 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6457 scale
= build_int_cst (scaletype
, gather_scale
);
6459 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6460 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6461 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6465 for (j
= 0; j
< 6; ++j
)
6467 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6468 merge
= build_real (TREE_TYPE (rettype
), r
);
6472 merge
= build_vector_from_val (rettype
, merge
);
6473 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6475 prev_stmt_info
= NULL
;
6476 for (j
= 0; j
< ncopies
; ++j
)
6478 if (modifier
== WIDEN
&& (j
& 1))
6479 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6480 perm_mask
, stmt
, gsi
);
6483 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
6486 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6488 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6490 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6491 == TYPE_VECTOR_SUBPARTS (idxtype
));
6492 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
6493 var
= make_ssa_name (var
);
6494 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6496 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6497 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6502 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6504 if (!useless_type_conversion_p (vectype
, rettype
))
6506 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6507 == TYPE_VECTOR_SUBPARTS (rettype
));
6508 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
6509 op
= make_ssa_name (var
, new_stmt
);
6510 gimple_call_set_lhs (new_stmt
, op
);
6511 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6512 var
= make_ssa_name (vec_dest
);
6513 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6515 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6519 var
= make_ssa_name (vec_dest
, new_stmt
);
6520 gimple_call_set_lhs (new_stmt
, var
);
6523 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6525 if (modifier
== NARROW
)
6532 var
= permute_vec_elements (prev_res
, var
,
6533 perm_mask
, stmt
, gsi
);
6534 new_stmt
= SSA_NAME_DEF_STMT (var
);
6537 if (prev_stmt_info
== NULL
)
6538 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6540 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6541 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6545 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6547 gimple_stmt_iterator incr_gsi
;
6553 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6554 gimple_seq stmts
= NULL
;
6555 tree stride_base
, stride_step
, alias_off
;
6557 gcc_assert (!nested_in_vect_loop
);
6559 if (slp
&& grouped_load
)
6560 first_dr
= STMT_VINFO_DATA_REF
6561 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6566 = fold_build_pointer_plus
6567 (DR_BASE_ADDRESS (first_dr
),
6568 size_binop (PLUS_EXPR
,
6569 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6570 convert_to_ptrofftype (DR_INIT (first_dr
))));
6571 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6573 /* For a load with loop-invariant (but other than power-of-2)
6574 stride (i.e. not a grouped access) like so:
6576 for (i = 0; i < n; i += stride)
6579 we generate a new induction variable and new accesses to
6580 form a new vector (or vectors, depending on ncopies):
6582 for (j = 0; ; j += VF*stride)
6584 tmp2 = array[j + stride];
6586 vectemp = {tmp1, tmp2, ...}
6589 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6590 build_int_cst (TREE_TYPE (stride_step
), vf
));
6592 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6594 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6595 loop
, &incr_gsi
, insert_after
,
6597 incr
= gsi_stmt (incr_gsi
);
6598 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6600 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6601 &stmts
, true, NULL_TREE
);
6603 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6605 prev_stmt_info
= NULL
;
6606 running_off
= offvar
;
6607 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6608 int nloads
= nunits
;
6609 tree ltype
= TREE_TYPE (vectype
);
6610 auto_vec
<tree
> dr_chain
;
6613 nloads
= nunits
/ group_size
;
6614 if (group_size
< nunits
)
6615 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6618 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6619 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6621 dr_chain
.create (ncopies
);
6623 for (j
= 0; j
< ncopies
; j
++)
6629 vec_alloc (v
, nloads
);
6630 for (i
= 0; i
< nloads
; i
++)
6632 tree newref
, newoff
;
6634 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6636 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6639 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6640 newoff
= copy_ssa_name (running_off
);
6641 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6642 running_off
, stride_step
);
6643 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6645 running_off
= newoff
;
6648 vec_inv
= build_constructor (vectype
, v
);
6649 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6650 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6654 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6655 build2 (MEM_REF
, ltype
,
6656 running_off
, alias_off
));
6657 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6659 tree newoff
= copy_ssa_name (running_off
);
6660 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6661 running_off
, stride_step
);
6662 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6664 running_off
= newoff
;
6669 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6671 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6676 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6678 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6679 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6683 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6684 slp_node_instance
, false);
6690 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6692 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6693 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6694 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6696 /* Check if the chain of loads is already vectorized. */
6697 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6698 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6699 ??? But we can only do so if there is exactly one
6700 as we have no way to get at the rest. Leave the CSE
6702 ??? With the group load eventually participating
6703 in multiple different permutations (having multiple
6704 slp nodes which refer to the same group) the CSE
6705 is even wrong code. See PR56270. */
6708 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6711 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6712 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6715 /* VEC_NUM is the number of vect stmts to be created for this group. */
6718 grouped_load
= false;
6719 /* For SLP permutation support we need to load the whole group,
6720 not only the number of vector stmts the permutation result
6723 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6725 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6726 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6729 vec_num
= group_size
;
6735 group_size
= vec_num
= 1;
6739 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6740 gcc_assert (alignment_support_scheme
);
6741 /* Targets with load-lane instructions must not require explicit
6743 gcc_assert (!load_lanes_p
6744 || alignment_support_scheme
== dr_aligned
6745 || alignment_support_scheme
== dr_unaligned_supported
);
6747 /* In case the vectorization factor (VF) is bigger than the number
6748 of elements that we can fit in a vectype (nunits), we have to generate
6749 more than one vector stmt - i.e - we need to "unroll" the
6750 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6751 from one copy of the vector stmt to the next, in the field
6752 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6753 stages to find the correct vector defs to be used when vectorizing
6754 stmts that use the defs of the current stmt. The example below
6755 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6756 need to create 4 vectorized stmts):
6758 before vectorization:
6759 RELATED_STMT VEC_STMT
6763 step 1: vectorize stmt S1:
6764 We first create the vector stmt VS1_0, and, as usual, record a
6765 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6766 Next, we create the vector stmt VS1_1, and record a pointer to
6767 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6768 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6770 RELATED_STMT VEC_STMT
6771 VS1_0: vx0 = memref0 VS1_1 -
6772 VS1_1: vx1 = memref1 VS1_2 -
6773 VS1_2: vx2 = memref2 VS1_3 -
6774 VS1_3: vx3 = memref3 - -
6775 S1: x = load - VS1_0
6778 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6779 information we recorded in RELATED_STMT field is used to vectorize
6782 /* In case of interleaving (non-unit grouped access):
6789 Vectorized loads are created in the order of memory accesses
6790 starting from the access of the first stmt of the chain:
6793 VS2: vx1 = &base + vec_size*1
6794 VS3: vx3 = &base + vec_size*2
6795 VS4: vx4 = &base + vec_size*3
6797 Then permutation statements are generated:
6799 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6800 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6803 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6804 (the order of the data-refs in the output of vect_permute_load_chain
6805 corresponds to the order of scalar stmts in the interleaving chain - see
6806 the documentation of vect_permute_load_chain()).
6807 The generation of permutation stmts and recording them in
6808 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6810 In case of both multiple types and interleaving, the vector loads and
6811 permutation stmts above are created for every copy. The result vector
6812 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6813 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6815 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6816 on a target that supports unaligned accesses (dr_unaligned_supported)
6817 we generate the following code:
6821 p = p + indx * vectype_size;
6826 Otherwise, the data reference is potentially unaligned on a target that
6827 does not support unaligned accesses (dr_explicit_realign_optimized) -
6828 then generate the following code, in which the data in each iteration is
6829 obtained by two vector loads, one from the previous iteration, and one
6830 from the current iteration:
6832 msq_init = *(floor(p1))
6833 p2 = initial_addr + VS - 1;
6834 realignment_token = call target_builtin;
6837 p2 = p2 + indx * vectype_size
6839 vec_dest = realign_load (msq, lsq, realignment_token)
6844 /* If the misalignment remains the same throughout the execution of the
6845 loop, we can create the init_addr and permutation mask at the loop
6846 preheader. Otherwise, it needs to be created inside the loop.
6847 This can only occur when vectorizing memory accesses in the inner-loop
6848 nested within an outer-loop that is being vectorized. */
6850 if (nested_in_vect_loop
6851 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6852 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6854 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6855 compute_in_loop
= true;
6858 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6859 || alignment_support_scheme
== dr_explicit_realign
)
6860 && !compute_in_loop
)
6862 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6863 alignment_support_scheme
, NULL_TREE
,
6865 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6867 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6868 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6876 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6879 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6881 aggr_type
= vectype
;
6883 prev_stmt_info
= NULL
;
6884 for (j
= 0; j
< ncopies
; j
++)
6886 /* 1. Create the vector or array pointer update chain. */
6889 bool simd_lane_access_p
6890 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6891 if (simd_lane_access_p
6892 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6893 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6894 && integer_zerop (DR_OFFSET (first_dr
))
6895 && integer_zerop (DR_INIT (first_dr
))
6896 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6897 get_alias_set (DR_REF (first_dr
)))
6898 && (alignment_support_scheme
== dr_aligned
6899 || alignment_support_scheme
== dr_unaligned_supported
))
6901 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6902 dataref_offset
= build_int_cst (reference_alias_ptr_type
6903 (DR_REF (first_dr
)), 0);
6908 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6909 offset
, &dummy
, gsi
, &ptr_incr
,
6910 simd_lane_access_p
, &inv_p
,
6913 else if (dataref_offset
)
6914 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6915 TYPE_SIZE_UNIT (aggr_type
));
6917 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6918 TYPE_SIZE_UNIT (aggr_type
));
6920 if (grouped_load
|| slp_perm
)
6921 dr_chain
.create (vec_num
);
6927 vec_array
= create_vector_array (vectype
, vec_num
);
6930 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6931 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6932 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6933 gimple_call_set_lhs (new_stmt
, vec_array
);
6934 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6936 /* Extract each vector into an SSA_NAME. */
6937 for (i
= 0; i
< vec_num
; i
++)
6939 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6941 dr_chain
.quick_push (new_temp
);
6944 /* Record the mapping between SSA_NAMEs and statements. */
6945 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6949 for (i
= 0; i
< vec_num
; i
++)
6952 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6955 /* 2. Create the vector-load in the loop. */
6956 switch (alignment_support_scheme
)
6959 case dr_unaligned_supported
:
6961 unsigned int align
, misalign
;
6964 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
6967 : build_int_cst (reference_alias_ptr_type
6968 (DR_REF (first_dr
)), 0));
6969 align
= TYPE_ALIGN_UNIT (vectype
);
6970 if (alignment_support_scheme
== dr_aligned
)
6972 gcc_assert (aligned_access_p (first_dr
));
6975 else if (DR_MISALIGNMENT (first_dr
) == -1)
6977 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6978 align
= TYPE_ALIGN_UNIT (elem_type
);
6980 align
= (get_object_alignment (DR_REF (first_dr
))
6983 TREE_TYPE (data_ref
)
6984 = build_aligned_type (TREE_TYPE (data_ref
),
6985 align
* BITS_PER_UNIT
);
6989 TREE_TYPE (data_ref
)
6990 = build_aligned_type (TREE_TYPE (data_ref
),
6991 TYPE_ALIGN (elem_type
));
6992 misalign
= DR_MISALIGNMENT (first_dr
);
6994 if (dataref_offset
== NULL_TREE
6995 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6996 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7000 case dr_explicit_realign
:
7004 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7006 if (compute_in_loop
)
7007 msq
= vect_setup_realignment (first_stmt
, gsi
,
7009 dr_explicit_realign
,
7012 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7013 ptr
= copy_ssa_name (dataref_ptr
);
7015 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7016 new_stmt
= gimple_build_assign
7017 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7019 (TREE_TYPE (dataref_ptr
),
7020 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7021 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7023 = build2 (MEM_REF
, vectype
, ptr
,
7024 build_int_cst (reference_alias_ptr_type
7025 (DR_REF (first_dr
)), 0));
7026 vec_dest
= vect_create_destination_var (scalar_dest
,
7028 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7029 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7030 gimple_assign_set_lhs (new_stmt
, new_temp
);
7031 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7032 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7033 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7036 bump
= size_binop (MULT_EXPR
, vs
,
7037 TYPE_SIZE_UNIT (elem_type
));
7038 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7039 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7040 new_stmt
= gimple_build_assign
7041 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7044 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7045 ptr
= copy_ssa_name (ptr
, new_stmt
);
7046 gimple_assign_set_lhs (new_stmt
, ptr
);
7047 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7049 = build2 (MEM_REF
, vectype
, ptr
,
7050 build_int_cst (reference_alias_ptr_type
7051 (DR_REF (first_dr
)), 0));
7054 case dr_explicit_realign_optimized
:
7055 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7056 new_temp
= copy_ssa_name (dataref_ptr
);
7058 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7059 new_stmt
= gimple_build_assign
7060 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7062 (TREE_TYPE (dataref_ptr
),
7063 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7064 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7066 = build2 (MEM_REF
, vectype
, new_temp
,
7067 build_int_cst (reference_alias_ptr_type
7068 (DR_REF (first_dr
)), 0));
7073 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7074 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7075 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7076 gimple_assign_set_lhs (new_stmt
, new_temp
);
7077 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7079 /* 3. Handle explicit realignment if necessary/supported.
7081 vec_dest = realign_load (msq, lsq, realignment_token) */
7082 if (alignment_support_scheme
== dr_explicit_realign_optimized
7083 || alignment_support_scheme
== dr_explicit_realign
)
7085 lsq
= gimple_assign_lhs (new_stmt
);
7086 if (!realignment_token
)
7087 realignment_token
= dataref_ptr
;
7088 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7089 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7090 msq
, lsq
, realignment_token
);
7091 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7092 gimple_assign_set_lhs (new_stmt
, new_temp
);
7093 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7095 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7098 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7099 add_phi_arg (phi
, lsq
,
7100 loop_latch_edge (containing_loop
),
7106 /* 4. Handle invariant-load. */
7107 if (inv_p
&& !bb_vinfo
)
7109 gcc_assert (!grouped_load
);
7110 /* If we have versioned for aliasing or the loop doesn't
7111 have any data dependencies that would preclude this,
7112 then we are sure this is a loop invariant load and
7113 thus we can insert it on the preheader edge. */
7114 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7115 && !nested_in_vect_loop
7116 && hoist_defs_of_uses (stmt
, loop
))
7118 if (dump_enabled_p ())
7120 dump_printf_loc (MSG_NOTE
, vect_location
,
7121 "hoisting out of the vectorized "
7123 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7125 tree tem
= copy_ssa_name (scalar_dest
);
7126 gsi_insert_on_edge_immediate
7127 (loop_preheader_edge (loop
),
7128 gimple_build_assign (tem
,
7130 (gimple_assign_rhs1 (stmt
))));
7131 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7135 gimple_stmt_iterator gsi2
= *gsi
;
7137 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7140 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7141 set_vinfo_for_stmt (new_stmt
,
7142 new_stmt_vec_info (new_stmt
, vinfo
));
7147 tree perm_mask
= perm_mask_for_reverse (vectype
);
7148 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7149 perm_mask
, stmt
, gsi
);
7150 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7153 /* Collect vector loads and later create their permutation in
7154 vect_transform_grouped_load (). */
7155 if (grouped_load
|| slp_perm
)
7156 dr_chain
.quick_push (new_temp
);
7158 /* Store vector loads in the corresponding SLP_NODE. */
7159 if (slp
&& !slp_perm
)
7160 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7162 /* Bump the vector pointer to account for a gap or for excess
7163 elements loaded for a permuted SLP load. */
7164 if (group_gap_adj
!= 0)
7168 = wide_int_to_tree (sizetype
,
7169 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7170 group_gap_adj
, &ovf
));
7171 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7176 if (slp
&& !slp_perm
)
7181 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7182 slp_node_instance
, false))
7184 dr_chain
.release ();
7193 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7194 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7199 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7201 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7202 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7205 dr_chain
.release ();
7211 /* Function vect_is_simple_cond.
7214 LOOP - the loop that is being vectorized.
7215 COND - Condition that is checked for simple use.
7218 *COMP_VECTYPE - the vector type for the comparison.
7220 Returns whether a COND can be vectorized. Checks whether
7221 condition operands are supportable using vec_is_simple_use. */
7224 vect_is_simple_cond (tree cond
, gimple
*stmt
, vec_info
*vinfo
,
7229 enum vect_def_type dt
;
7230 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7232 if (!COMPARISON_CLASS_P (cond
))
7235 lhs
= TREE_OPERAND (cond
, 0);
7236 rhs
= TREE_OPERAND (cond
, 1);
7238 if (TREE_CODE (lhs
) == SSA_NAME
)
7240 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7241 if (!vect_is_simple_use_1 (lhs
, stmt
, vinfo
,
7242 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
7245 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7246 && TREE_CODE (lhs
) != FIXED_CST
)
7249 if (TREE_CODE (rhs
) == SSA_NAME
)
7251 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7252 if (!vect_is_simple_use_1 (rhs
, stmt
, vinfo
,
7253 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
7256 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7257 && TREE_CODE (rhs
) != FIXED_CST
)
7260 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7264 /* vectorizable_condition.
7266 Check if STMT is conditional modify expression that can be vectorized.
7267 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7268 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7271 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7272 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7273 else clause if it is 2).
7275 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7278 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7279 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7282 tree scalar_dest
= NULL_TREE
;
7283 tree vec_dest
= NULL_TREE
;
7284 tree cond_expr
, then_clause
, else_clause
;
7285 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7286 tree comp_vectype
= NULL_TREE
;
7287 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7288 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7289 tree vec_compare
, vec_cond_expr
;
7291 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7293 enum vect_def_type dt
, dts
[4];
7295 enum tree_code code
;
7296 stmt_vec_info prev_stmt_info
= NULL
;
7298 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7299 vec
<tree
> vec_oprnds0
= vNULL
;
7300 vec
<tree
> vec_oprnds1
= vNULL
;
7301 vec
<tree
> vec_oprnds2
= vNULL
;
7302 vec
<tree
> vec_oprnds3
= vNULL
;
7305 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7308 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7311 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7312 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7316 /* FORNOW: not yet supported. */
7317 if (STMT_VINFO_LIVE_P (stmt_info
))
7319 if (dump_enabled_p ())
7320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7321 "value used after loop.\n");
7325 /* Is vectorizable conditional operation? */
7326 if (!is_gimple_assign (stmt
))
7329 code
= gimple_assign_rhs_code (stmt
);
7331 if (code
!= COND_EXPR
)
7334 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7335 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7337 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7340 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7342 gcc_assert (ncopies
>= 1);
7343 if (reduc_index
&& ncopies
> 1)
7344 return false; /* FORNOW */
7346 cond_expr
= gimple_assign_rhs1 (stmt
);
7347 then_clause
= gimple_assign_rhs2 (stmt
);
7348 else_clause
= gimple_assign_rhs3 (stmt
);
7350 if (!vect_is_simple_cond (cond_expr
, stmt
, stmt_info
->vinfo
, &comp_vectype
)
7354 if (TREE_CODE (then_clause
) == SSA_NAME
)
7356 gimple
*then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
7357 if (!vect_is_simple_use (then_clause
, stmt
, stmt_info
->vinfo
,
7358 &then_def_stmt
, &def
, &dt
))
7361 else if (TREE_CODE (then_clause
) != INTEGER_CST
7362 && TREE_CODE (then_clause
) != REAL_CST
7363 && TREE_CODE (then_clause
) != FIXED_CST
)
7366 if (TREE_CODE (else_clause
) == SSA_NAME
)
7368 gimple
*else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
7369 if (!vect_is_simple_use (else_clause
, stmt
, stmt_info
->vinfo
,
7370 &else_def_stmt
, &def
, &dt
))
7373 else if (TREE_CODE (else_clause
) != INTEGER_CST
7374 && TREE_CODE (else_clause
) != REAL_CST
7375 && TREE_CODE (else_clause
) != FIXED_CST
)
7378 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
7379 /* The result of a vector comparison should be signed type. */
7380 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
7381 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
7382 if (vec_cmp_type
== NULL_TREE
)
7387 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7388 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7395 vec_oprnds0
.create (1);
7396 vec_oprnds1
.create (1);
7397 vec_oprnds2
.create (1);
7398 vec_oprnds3
.create (1);
7402 scalar_dest
= gimple_assign_lhs (stmt
);
7403 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7405 /* Handle cond expr. */
7406 for (j
= 0; j
< ncopies
; j
++)
7408 gassign
*new_stmt
= NULL
;
7413 auto_vec
<tree
, 4> ops
;
7414 auto_vec
<vec
<tree
>, 4> vec_defs
;
7416 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7417 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7418 ops
.safe_push (then_clause
);
7419 ops
.safe_push (else_clause
);
7420 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7421 vec_oprnds3
= vec_defs
.pop ();
7422 vec_oprnds2
= vec_defs
.pop ();
7423 vec_oprnds1
= vec_defs
.pop ();
7424 vec_oprnds0
= vec_defs
.pop ();
7427 vec_defs
.release ();
7433 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7435 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
7436 loop_vinfo
, >emp
, &def
, &dts
[0]);
7439 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7441 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
7442 loop_vinfo
, >emp
, &def
, &dts
[1]);
7443 if (reduc_index
== 1)
7444 vec_then_clause
= reduc_def
;
7447 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7449 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
7450 >emp
, &def
, &dts
[2]);
7452 if (reduc_index
== 2)
7453 vec_else_clause
= reduc_def
;
7456 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7458 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
7459 >emp
, &def
, &dts
[3]);
7465 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
7466 vec_oprnds0
.pop ());
7467 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
7468 vec_oprnds1
.pop ());
7469 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7470 vec_oprnds2
.pop ());
7471 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7472 vec_oprnds3
.pop ());
7477 vec_oprnds0
.quick_push (vec_cond_lhs
);
7478 vec_oprnds1
.quick_push (vec_cond_rhs
);
7479 vec_oprnds2
.quick_push (vec_then_clause
);
7480 vec_oprnds3
.quick_push (vec_else_clause
);
7483 /* Arguments are ready. Create the new vector stmt. */
7484 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7486 vec_cond_rhs
= vec_oprnds1
[i
];
7487 vec_then_clause
= vec_oprnds2
[i
];
7488 vec_else_clause
= vec_oprnds3
[i
];
7490 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7491 vec_cond_lhs
, vec_cond_rhs
);
7492 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
7493 vec_compare
, vec_then_clause
, vec_else_clause
);
7495 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
7496 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7497 gimple_assign_set_lhs (new_stmt
, new_temp
);
7498 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7500 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7507 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7509 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7511 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7514 vec_oprnds0
.release ();
7515 vec_oprnds1
.release ();
7516 vec_oprnds2
.release ();
7517 vec_oprnds3
.release ();
7523 /* Make sure the statement is vectorizable. */
7526 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
7528 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7529 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7530 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7532 tree scalar_type
, vectype
;
7533 gimple
*pattern_stmt
;
7534 gimple_seq pattern_def_seq
;
7536 if (dump_enabled_p ())
7538 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7539 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7542 if (gimple_has_volatile_ops (stmt
))
7544 if (dump_enabled_p ())
7545 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7546 "not vectorized: stmt has volatile operands\n");
7551 /* Skip stmts that do not need to be vectorized. In loops this is expected
7553 - the COND_EXPR which is the loop exit condition
7554 - any LABEL_EXPRs in the loop
7555 - computations that are used only for array indexing or loop control.
7556 In basic blocks we only analyze statements that are a part of some SLP
7557 instance, therefore, all the statements are relevant.
7559 Pattern statement needs to be analyzed instead of the original statement
7560 if the original statement is not relevant. Otherwise, we analyze both
7561 statements. In basic blocks we are called from some SLP instance
7562 traversal, don't analyze pattern stmts instead, the pattern stmts
7563 already will be part of SLP instance. */
7565 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7566 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7567 && !STMT_VINFO_LIVE_P (stmt_info
))
7569 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7571 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7572 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7574 /* Analyze PATTERN_STMT instead of the original stmt. */
7575 stmt
= pattern_stmt
;
7576 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7577 if (dump_enabled_p ())
7579 dump_printf_loc (MSG_NOTE
, vect_location
,
7580 "==> examining pattern statement: ");
7581 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7586 if (dump_enabled_p ())
7587 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7592 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7595 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7596 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7598 /* Analyze PATTERN_STMT too. */
7599 if (dump_enabled_p ())
7601 dump_printf_loc (MSG_NOTE
, vect_location
,
7602 "==> examining pattern statement: ");
7603 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7606 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7610 if (is_pattern_stmt_p (stmt_info
)
7612 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7614 gimple_stmt_iterator si
;
7616 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7618 gimple
*pattern_def_stmt
= gsi_stmt (si
);
7619 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7620 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7622 /* Analyze def stmt of STMT if it's a pattern stmt. */
7623 if (dump_enabled_p ())
7625 dump_printf_loc (MSG_NOTE
, vect_location
,
7626 "==> examining pattern def statement: ");
7627 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7630 if (!vect_analyze_stmt (pattern_def_stmt
,
7631 need_to_vectorize
, node
))
7637 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7639 case vect_internal_def
:
7642 case vect_reduction_def
:
7643 case vect_nested_cycle
:
7644 gcc_assert (!bb_vinfo
7645 && (relevance
== vect_used_in_outer
7646 || relevance
== vect_used_in_outer_by_reduction
7647 || relevance
== vect_used_by_reduction
7648 || relevance
== vect_unused_in_scope
));
7651 case vect_induction_def
:
7652 case vect_constant_def
:
7653 case vect_external_def
:
7654 case vect_unknown_def_type
:
7661 gcc_assert (PURE_SLP_STMT (stmt_info
));
7663 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7664 if (dump_enabled_p ())
7666 dump_printf_loc (MSG_NOTE
, vect_location
,
7667 "get vectype for scalar type: ");
7668 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7669 dump_printf (MSG_NOTE
, "\n");
7672 vectype
= get_vectype_for_scalar_type (scalar_type
);
7675 if (dump_enabled_p ())
7677 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7678 "not SLPed: unsupported data-type ");
7679 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7681 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7686 if (dump_enabled_p ())
7688 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7689 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7690 dump_printf (MSG_NOTE
, "\n");
7693 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7696 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7698 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7699 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7700 || (is_gimple_call (stmt
)
7701 && gimple_call_lhs (stmt
) == NULL_TREE
));
7702 *need_to_vectorize
= true;
7705 if (PURE_SLP_STMT (stmt_info
) && !node
)
7707 dump_printf_loc (MSG_NOTE
, vect_location
,
7708 "handled only by SLP analysis\n");
7714 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7715 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7716 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7717 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7718 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7719 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7720 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7721 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7722 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7723 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7724 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
7725 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7729 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7730 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7731 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7732 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7733 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7734 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7735 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7736 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7737 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7742 if (dump_enabled_p ())
7744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7745 "not vectorized: relevant stmt not ");
7746 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7747 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7756 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7757 need extra handling, except for vectorizable reductions. */
7758 if (STMT_VINFO_LIVE_P (stmt_info
)
7759 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7760 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7764 if (dump_enabled_p ())
7766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7767 "not vectorized: live stmt not ");
7768 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7769 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7779 /* Function vect_transform_stmt.
7781 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7784 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7785 bool *grouped_store
, slp_tree slp_node
,
7786 slp_instance slp_node_instance
)
7788 bool is_store
= false;
7789 gimple
*vec_stmt
= NULL
;
7790 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7793 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7795 switch (STMT_VINFO_TYPE (stmt_info
))
7797 case type_demotion_vec_info_type
:
7798 case type_promotion_vec_info_type
:
7799 case type_conversion_vec_info_type
:
7800 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7804 case induc_vec_info_type
:
7805 gcc_assert (!slp_node
);
7806 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7810 case shift_vec_info_type
:
7811 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7815 case op_vec_info_type
:
7816 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7820 case assignment_vec_info_type
:
7821 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7825 case load_vec_info_type
:
7826 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7831 case store_vec_info_type
:
7832 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7834 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7836 /* In case of interleaving, the whole chain is vectorized when the
7837 last store in the chain is reached. Store stmts before the last
7838 one are skipped, and there vec_stmt_info shouldn't be freed
7840 *grouped_store
= true;
7841 if (STMT_VINFO_VEC_STMT (stmt_info
))
7848 case condition_vec_info_type
:
7849 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7853 case call_vec_info_type
:
7854 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7855 stmt
= gsi_stmt (*gsi
);
7856 if (is_gimple_call (stmt
)
7857 && gimple_call_internal_p (stmt
)
7858 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7862 case call_simd_clone_vec_info_type
:
7863 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7864 stmt
= gsi_stmt (*gsi
);
7867 case reduc_vec_info_type
:
7868 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7873 if (!STMT_VINFO_LIVE_P (stmt_info
))
7875 if (dump_enabled_p ())
7876 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7877 "stmt not supported.\n");
7882 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
7883 This would break hybrid SLP vectorization. */
7885 gcc_assert (!vec_stmt
7886 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
7888 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7889 is being vectorized, but outside the immediately enclosing loop. */
7891 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7892 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7893 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7894 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7895 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7896 || STMT_VINFO_RELEVANT (stmt_info
) ==
7897 vect_used_in_outer_by_reduction
))
7899 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7900 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7901 imm_use_iterator imm_iter
;
7902 use_operand_p use_p
;
7906 if (dump_enabled_p ())
7907 dump_printf_loc (MSG_NOTE
, vect_location
,
7908 "Record the vdef for outer-loop vectorization.\n");
7910 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7911 (to be used when vectorizing outer-loop stmts that use the DEF of
7913 if (gimple_code (stmt
) == GIMPLE_PHI
)
7914 scalar_dest
= PHI_RESULT (stmt
);
7916 scalar_dest
= gimple_assign_lhs (stmt
);
7918 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7920 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7922 exit_phi
= USE_STMT (use_p
);
7923 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7928 /* Handle stmts whose DEF is used outside the loop-nest that is
7929 being vectorized. */
7930 if (STMT_VINFO_LIVE_P (stmt_info
)
7931 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7933 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7938 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7944 /* Remove a group of stores (for SLP or interleaving), free their
7948 vect_remove_stores (gimple
*first_stmt
)
7950 gimple
*next
= first_stmt
;
7952 gimple_stmt_iterator next_si
;
7956 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7958 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7959 if (is_pattern_stmt_p (stmt_info
))
7960 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7961 /* Free the attached stmt_vec_info and remove the stmt. */
7962 next_si
= gsi_for_stmt (next
);
7963 unlink_stmt_vdef (next
);
7964 gsi_remove (&next_si
, true);
7965 release_defs (next
);
7966 free_stmt_vec_info (next
);
7972 /* Function new_stmt_vec_info.
7974 Create and initialize a new stmt_vec_info struct for STMT. */
7977 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
7980 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7982 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7983 STMT_VINFO_STMT (res
) = stmt
;
7985 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7986 STMT_VINFO_LIVE_P (res
) = false;
7987 STMT_VINFO_VECTYPE (res
) = NULL
;
7988 STMT_VINFO_VEC_STMT (res
) = NULL
;
7989 STMT_VINFO_VECTORIZABLE (res
) = true;
7990 STMT_VINFO_IN_PATTERN_P (res
) = false;
7991 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7992 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7993 STMT_VINFO_DATA_REF (res
) = NULL
;
7995 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7996 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7997 STMT_VINFO_DR_INIT (res
) = NULL
;
7998 STMT_VINFO_DR_STEP (res
) = NULL
;
7999 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8001 if (gimple_code (stmt
) == GIMPLE_PHI
8002 && is_loop_header_bb_p (gimple_bb (stmt
)))
8003 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8005 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8007 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8008 STMT_SLP_TYPE (res
) = loop_vect
;
8009 GROUP_FIRST_ELEMENT (res
) = NULL
;
8010 GROUP_NEXT_ELEMENT (res
) = NULL
;
8011 GROUP_SIZE (res
) = 0;
8012 GROUP_STORE_COUNT (res
) = 0;
8013 GROUP_GAP (res
) = 0;
8014 GROUP_SAME_DR_STMT (res
) = NULL
;
8020 /* Create a hash table for stmt_vec_info. */
8023 init_stmt_vec_info_vec (void)
8025 gcc_assert (!stmt_vec_info_vec
.exists ());
8026 stmt_vec_info_vec
.create (50);
8030 /* Free hash table for stmt_vec_info. */
8033 free_stmt_vec_info_vec (void)
8037 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8039 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8040 gcc_assert (stmt_vec_info_vec
.exists ());
8041 stmt_vec_info_vec
.release ();
8045 /* Free stmt vectorization related info. */
8048 free_stmt_vec_info (gimple
*stmt
)
8050 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8055 /* Check if this statement has a related "pattern stmt"
8056 (introduced by the vectorizer during the pattern recognition
8057 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8059 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8061 stmt_vec_info patt_info
8062 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8065 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8066 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8067 gimple_set_bb (patt_stmt
, NULL
);
8068 tree lhs
= gimple_get_lhs (patt_stmt
);
8069 if (TREE_CODE (lhs
) == SSA_NAME
)
8070 release_ssa_name (lhs
);
8073 gimple_stmt_iterator si
;
8074 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8076 gimple
*seq_stmt
= gsi_stmt (si
);
8077 gimple_set_bb (seq_stmt
, NULL
);
8078 lhs
= gimple_get_lhs (patt_stmt
);
8079 if (TREE_CODE (lhs
) == SSA_NAME
)
8080 release_ssa_name (lhs
);
8081 free_stmt_vec_info (seq_stmt
);
8084 free_stmt_vec_info (patt_stmt
);
8088 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8089 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8090 set_vinfo_for_stmt (stmt
, NULL
);
8095 /* Function get_vectype_for_scalar_type_and_size.
8097 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8101 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8103 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8104 machine_mode simd_mode
;
8105 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8112 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8113 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8116 /* For vector types of elements whose mode precision doesn't
8117 match their types precision we use a element type of mode
8118 precision. The vectorization routines will have to make sure
8119 they support the proper result truncation/extension.
8120 We also make sure to build vector types with INTEGER_TYPE
8121 component type only. */
8122 if (INTEGRAL_TYPE_P (scalar_type
)
8123 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8124 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8125 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8126 TYPE_UNSIGNED (scalar_type
));
8128 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8129 When the component mode passes the above test simply use a type
8130 corresponding to that mode. The theory is that any use that
8131 would cause problems with this will disable vectorization anyway. */
8132 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8133 && !INTEGRAL_TYPE_P (scalar_type
))
8134 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8136 /* We can't build a vector type of elements with alignment bigger than
8138 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8139 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8140 TYPE_UNSIGNED (scalar_type
));
8142 /* If we felt back to using the mode fail if there was
8143 no scalar type for it. */
8144 if (scalar_type
== NULL_TREE
)
8147 /* If no size was supplied use the mode the target prefers. Otherwise
8148 lookup a vector mode of the specified size. */
8150 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8152 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8153 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8157 vectype
= build_vector_type (scalar_type
, nunits
);
8159 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8160 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8166 unsigned int current_vector_size
;
8168 /* Function get_vectype_for_scalar_type.
8170 Returns the vector type corresponding to SCALAR_TYPE as supported
8174 get_vectype_for_scalar_type (tree scalar_type
)
8177 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8178 current_vector_size
);
8180 && current_vector_size
== 0)
8181 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8185 /* Function get_same_sized_vectype
8187 Returns a vector type corresponding to SCALAR_TYPE of size
8188 VECTOR_TYPE if supported by the target. */
8191 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8193 return get_vectype_for_scalar_type_and_size
8194 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8197 /* Function vect_is_simple_use.
8200 LOOP_VINFO - the vect info of the loop that is being vectorized.
8201 BB_VINFO - the vect info of the basic block that is being vectorized.
8202 OPERAND - operand of STMT in the loop or bb.
8203 DEF - the defining stmt in case OPERAND is an SSA_NAME.
8205 Returns whether a stmt with OPERAND can be vectorized.
8206 For loops, supportable operands are constants, loop invariants, and operands
8207 that are defined by the current iteration of the loop. Unsupportable
8208 operands are those that are defined by a previous iteration of the loop (as
8209 is the case in reduction/induction computations).
8210 For basic blocks, supportable operands are constants and bb invariants.
8211 For now, operands defined outside the basic block are not supported. */
8214 vect_is_simple_use (tree operand
, gimple
*stmt
, vec_info
*vinfo
,
8215 gimple
**def_stmt
, tree
*def
, enum vect_def_type
*dt
)
8219 *dt
= vect_unknown_def_type
;
8221 if (dump_enabled_p ())
8223 dump_printf_loc (MSG_NOTE
, vect_location
,
8224 "vect_is_simple_use: operand ");
8225 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8226 dump_printf (MSG_NOTE
, "\n");
8229 if (CONSTANT_CLASS_P (operand
))
8231 *dt
= vect_constant_def
;
8235 if (is_gimple_min_invariant (operand
))
8238 *dt
= vect_external_def
;
8242 if (TREE_CODE (operand
) != SSA_NAME
)
8244 if (dump_enabled_p ())
8245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8250 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8253 *dt
= vect_external_def
;
8257 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8258 if (dump_enabled_p ())
8260 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8261 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8264 basic_block bb
= gimple_bb (*def_stmt
);
8265 if ((is_a
<loop_vec_info
> (vinfo
)
8266 && !flow_bb_inside_loop_p (as_a
<loop_vec_info
> (vinfo
)->loop
, bb
))
8267 || (is_a
<bb_vec_info
> (vinfo
)
8268 && (bb
!= as_a
<bb_vec_info
> (vinfo
)->bb
8269 || gimple_code (*def_stmt
) == GIMPLE_PHI
)))
8270 *dt
= vect_external_def
;
8273 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8274 if (is_a
<bb_vec_info
> (vinfo
) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo
))
8275 *dt
= vect_external_def
;
8277 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8280 if (dump_enabled_p ())
8282 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8285 case vect_uninitialized_def
:
8286 dump_printf (MSG_NOTE
, "uninitialized\n");
8288 case vect_constant_def
:
8289 dump_printf (MSG_NOTE
, "constant\n");
8291 case vect_external_def
:
8292 dump_printf (MSG_NOTE
, "external\n");
8294 case vect_internal_def
:
8295 dump_printf (MSG_NOTE
, "internal\n");
8297 case vect_induction_def
:
8298 dump_printf (MSG_NOTE
, "induction\n");
8300 case vect_reduction_def
:
8301 dump_printf (MSG_NOTE
, "reduction\n");
8303 case vect_double_reduction_def
:
8304 dump_printf (MSG_NOTE
, "double reduction\n");
8306 case vect_nested_cycle
:
8307 dump_printf (MSG_NOTE
, "nested cycle\n");
8309 case vect_unknown_def_type
:
8310 dump_printf (MSG_NOTE
, "unknown\n");
8315 if (*dt
== vect_unknown_def_type
8317 && *dt
== vect_double_reduction_def
8318 && gimple_code (stmt
) != GIMPLE_PHI
))
8320 if (dump_enabled_p ())
8321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8322 "Unsupported pattern.\n");
8326 switch (gimple_code (*def_stmt
))
8329 *def
= gimple_phi_result (*def_stmt
);
8333 *def
= gimple_assign_lhs (*def_stmt
);
8337 *def
= gimple_call_lhs (*def_stmt
);
8342 if (dump_enabled_p ())
8343 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8344 "unsupported defining stmt:\n");
8351 /* Function vect_is_simple_use_1.
8353 Same as vect_is_simple_use_1 but also determines the vector operand
8354 type of OPERAND and stores it to *VECTYPE. If the definition of
8355 OPERAND is vect_uninitialized_def, vect_constant_def or
8356 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8357 is responsible to compute the best suited vector type for the
8361 vect_is_simple_use_1 (tree operand
, gimple
*stmt
, vec_info
*vinfo
,
8363 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
8365 if (!vect_is_simple_use (operand
, stmt
, vinfo
, def_stmt
, def
, dt
))
8368 /* Now get a vector type if the def is internal, otherwise supply
8369 NULL_TREE and leave it up to the caller to figure out a proper
8370 type for the use stmt. */
8371 if (*dt
== vect_internal_def
8372 || *dt
== vect_induction_def
8373 || *dt
== vect_reduction_def
8374 || *dt
== vect_double_reduction_def
8375 || *dt
== vect_nested_cycle
)
8377 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8379 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8380 && !STMT_VINFO_RELEVANT (stmt_info
)
8381 && !STMT_VINFO_LIVE_P (stmt_info
))
8382 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8384 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8385 gcc_assert (*vectype
!= NULL_TREE
);
8387 else if (*dt
== vect_uninitialized_def
8388 || *dt
== vect_constant_def
8389 || *dt
== vect_external_def
)
8390 *vectype
= NULL_TREE
;
8398 /* Function supportable_widening_operation
8400 Check whether an operation represented by the code CODE is a
8401 widening operation that is supported by the target platform in
8402 vector form (i.e., when operating on arguments of type VECTYPE_IN
8403 producing a result of type VECTYPE_OUT).
8405 Widening operations we currently support are NOP (CONVERT), FLOAT
8406 and WIDEN_MULT. This function checks if these operations are supported
8407 by the target platform either directly (via vector tree-codes), or via
8411 - CODE1 and CODE2 are codes of vector operations to be used when
8412 vectorizing the operation, if available.
8413 - MULTI_STEP_CVT determines the number of required intermediate steps in
8414 case of multi-step conversion (like char->short->int - in that case
8415 MULTI_STEP_CVT will be 1).
8416 - INTERM_TYPES contains the intermediate type required to perform the
8417 widening operation (short in the above example). */
8420 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
8421 tree vectype_out
, tree vectype_in
,
8422 enum tree_code
*code1
, enum tree_code
*code2
,
8423 int *multi_step_cvt
,
8424 vec
<tree
> *interm_types
)
8426 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8427 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8428 struct loop
*vect_loop
= NULL
;
8429 machine_mode vec_mode
;
8430 enum insn_code icode1
, icode2
;
8431 optab optab1
, optab2
;
8432 tree vectype
= vectype_in
;
8433 tree wide_vectype
= vectype_out
;
8434 enum tree_code c1
, c2
;
8436 tree prev_type
, intermediate_type
;
8437 machine_mode intermediate_mode
, prev_mode
;
8438 optab optab3
, optab4
;
8440 *multi_step_cvt
= 0;
8442 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8446 case WIDEN_MULT_EXPR
:
8447 /* The result of a vectorized widening operation usually requires
8448 two vectors (because the widened results do not fit into one vector).
8449 The generated vector results would normally be expected to be
8450 generated in the same order as in the original scalar computation,
8451 i.e. if 8 results are generated in each vector iteration, they are
8452 to be organized as follows:
8453 vect1: [res1,res2,res3,res4],
8454 vect2: [res5,res6,res7,res8].
8456 However, in the special case that the result of the widening
8457 operation is used in a reduction computation only, the order doesn't
8458 matter (because when vectorizing a reduction we change the order of
8459 the computation). Some targets can take advantage of this and
8460 generate more efficient code. For example, targets like Altivec,
8461 that support widen_mult using a sequence of {mult_even,mult_odd}
8462 generate the following vectors:
8463 vect1: [res1,res3,res5,res7],
8464 vect2: [res2,res4,res6,res8].
8466 When vectorizing outer-loops, we execute the inner-loop sequentially
8467 (each vectorized inner-loop iteration contributes to VF outer-loop
8468 iterations in parallel). We therefore don't allow to change the
8469 order of the computation in the inner-loop during outer-loop
8471 /* TODO: Another case in which order doesn't *really* matter is when we
8472 widen and then contract again, e.g. (short)((int)x * y >> 8).
8473 Normally, pack_trunc performs an even/odd permute, whereas the
8474 repack from an even/odd expansion would be an interleave, which
8475 would be significantly simpler for e.g. AVX2. */
8476 /* In any case, in order to avoid duplicating the code below, recurse
8477 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8478 are properly set up for the caller. If we fail, we'll continue with
8479 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8481 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8482 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8483 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8484 stmt
, vectype_out
, vectype_in
,
8485 code1
, code2
, multi_step_cvt
,
8488 /* Elements in a vector with vect_used_by_reduction property cannot
8489 be reordered if the use chain with this property does not have the
8490 same operation. One such an example is s += a * b, where elements
8491 in a and b cannot be reordered. Here we check if the vector defined
8492 by STMT is only directly used in the reduction statement. */
8493 tree lhs
= gimple_assign_lhs (stmt
);
8494 use_operand_p dummy
;
8496 stmt_vec_info use_stmt_info
= NULL
;
8497 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8498 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8499 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8502 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8503 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8506 case VEC_WIDEN_MULT_EVEN_EXPR
:
8507 /* Support the recursion induced just above. */
8508 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8509 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8512 case WIDEN_LSHIFT_EXPR
:
8513 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8514 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8518 c1
= VEC_UNPACK_LO_EXPR
;
8519 c2
= VEC_UNPACK_HI_EXPR
;
8523 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8524 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8527 case FIX_TRUNC_EXPR
:
8528 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8529 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8530 computing the operation. */
8537 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8540 if (code
== FIX_TRUNC_EXPR
)
8542 /* The signedness is determined from output operand. */
8543 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8544 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8548 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8549 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8552 if (!optab1
|| !optab2
)
8555 vec_mode
= TYPE_MODE (vectype
);
8556 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8557 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8563 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8564 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8567 /* Check if it's a multi-step conversion that can be done using intermediate
8570 prev_type
= vectype
;
8571 prev_mode
= vec_mode
;
8573 if (!CONVERT_EXPR_CODE_P (code
))
8576 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8577 intermediate steps in promotion sequence. We try
8578 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8580 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8581 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8583 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8585 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8586 TYPE_UNSIGNED (prev_type
));
8587 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8588 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8590 if (!optab3
|| !optab4
8591 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8592 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8593 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8594 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8595 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8596 == CODE_FOR_nothing
)
8597 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8598 == CODE_FOR_nothing
))
8601 interm_types
->quick_push (intermediate_type
);
8602 (*multi_step_cvt
)++;
8604 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8605 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8608 prev_type
= intermediate_type
;
8609 prev_mode
= intermediate_mode
;
8612 interm_types
->release ();
8617 /* Function supportable_narrowing_operation
8619 Check whether an operation represented by the code CODE is a
8620 narrowing operation that is supported by the target platform in
8621 vector form (i.e., when operating on arguments of type VECTYPE_IN
8622 and producing a result of type VECTYPE_OUT).
8624 Narrowing operations we currently support are NOP (CONVERT) and
8625 FIX_TRUNC. This function checks if these operations are supported by
8626 the target platform directly via vector tree-codes.
8629 - CODE1 is the code of a vector operation to be used when
8630 vectorizing the operation, if available.
8631 - MULTI_STEP_CVT determines the number of required intermediate steps in
8632 case of multi-step conversion (like int->short->char - in that case
8633 MULTI_STEP_CVT will be 1).
8634 - INTERM_TYPES contains the intermediate type required to perform the
8635 narrowing operation (short in the above example). */
8638 supportable_narrowing_operation (enum tree_code code
,
8639 tree vectype_out
, tree vectype_in
,
8640 enum tree_code
*code1
, int *multi_step_cvt
,
8641 vec
<tree
> *interm_types
)
8643 machine_mode vec_mode
;
8644 enum insn_code icode1
;
8645 optab optab1
, interm_optab
;
8646 tree vectype
= vectype_in
;
8647 tree narrow_vectype
= vectype_out
;
8649 tree intermediate_type
;
8650 machine_mode intermediate_mode
, prev_mode
;
8654 *multi_step_cvt
= 0;
8658 c1
= VEC_PACK_TRUNC_EXPR
;
8661 case FIX_TRUNC_EXPR
:
8662 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8666 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8667 tree code and optabs used for computing the operation. */
8674 if (code
== FIX_TRUNC_EXPR
)
8675 /* The signedness is determined from output operand. */
8676 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8678 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8683 vec_mode
= TYPE_MODE (vectype
);
8684 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8689 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8692 /* Check if it's a multi-step conversion that can be done using intermediate
8694 prev_mode
= vec_mode
;
8695 if (code
== FIX_TRUNC_EXPR
)
8696 uns
= TYPE_UNSIGNED (vectype_out
);
8698 uns
= TYPE_UNSIGNED (vectype
);
8700 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8701 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8702 costly than signed. */
8703 if (code
== FIX_TRUNC_EXPR
&& uns
)
8705 enum insn_code icode2
;
8708 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8710 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8711 if (interm_optab
!= unknown_optab
8712 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8713 && insn_data
[icode1
].operand
[0].mode
8714 == insn_data
[icode2
].operand
[0].mode
)
8717 optab1
= interm_optab
;
8722 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8723 intermediate steps in promotion sequence. We try
8724 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8725 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8726 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8728 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8730 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8732 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8735 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8736 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8737 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8738 == CODE_FOR_nothing
))
8741 interm_types
->quick_push (intermediate_type
);
8742 (*multi_step_cvt
)++;
8744 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8747 prev_mode
= intermediate_mode
;
8748 optab1
= interm_optab
;
8751 interm_types
->release ();