1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
60 return STMT_VINFO_VECTYPE (stmt_info
);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
68 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
69 basic_block bb
= gimple_bb (stmt
);
70 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
76 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
78 return (bb
->loop_father
== loop
->inner
);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
87 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
88 int misalign
, enum vect_cost_model_location where
)
92 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
93 stmt_info_for_cost si
= { count
, kind
,
94 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
96 body_cost_vec
->safe_push (si
);
98 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
101 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
102 count
, kind
, stmt_info
, misalign
, where
);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
184 enum vect_relevant relevant
, bool live_p
)
186 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
187 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
188 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
189 gimple
*pattern_stmt
;
191 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE
, vect_location
,
194 "mark relevant %d, live %d: ", relevant
, live_p
);
195 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
209 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE
, vect_location
,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info
= vinfo_for_stmt (pattern_stmt
);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
217 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
218 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
222 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
223 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
224 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
226 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
229 if (dump_enabled_p ())
230 dump_printf_loc (MSG_NOTE
, vect_location
,
231 "already marked relevant/live.\n");
235 worklist
->safe_push (stmt
);
239 /* Function is_simple_and_all_uses_invariant
241 Return true if STMT is simple and all uses of it are invariant. */
244 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
250 if (!is_gimple_assign (stmt
))
253 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
255 enum vect_def_type dt
= vect_uninitialized_def
;
257 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
261 "use not simple.\n");
265 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
271 /* Function vect_stmt_relevant_p.
273 Return true if STMT in loop that is represented by LOOP_VINFO is
274 "relevant for vectorization".
276 A stmt is considered "relevant for vectorization" if:
277 - it has uses outside the loop.
278 - it has vdefs (it alters memory).
279 - control stmts in the loop (except for the exit condition).
281 CHECKME: what other side effects would the vectorizer allow? */
284 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
285 enum vect_relevant
*relevant
, bool *live_p
)
287 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
289 imm_use_iterator imm_iter
;
293 *relevant
= vect_unused_in_scope
;
296 /* cond stmt other than loop exit cond. */
297 if (is_ctrl_stmt (stmt
)
298 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
299 != loop_exit_ctrl_vec_info_type
)
300 *relevant
= vect_used_in_scope
;
302 /* changing memory. */
303 if (gimple_code (stmt
) != GIMPLE_PHI
)
304 if (gimple_vdef (stmt
)
305 && !gimple_clobber_p (stmt
))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_NOTE
, vect_location
,
309 "vec_stmt_relevant_p: stmt has vdefs.\n");
310 *relevant
= vect_used_in_scope
;
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
316 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
318 basic_block bb
= gimple_bb (USE_STMT (use_p
));
319 if (!flow_bb_inside_loop_p (loop
, bb
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: used out of loop.\n");
325 if (is_gimple_debug (USE_STMT (use_p
)))
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
331 gcc_assert (bb
== single_exit (loop
)->dest
);
338 if (*live_p
&& *relevant
== vect_unused_in_scope
339 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE
, vect_location
,
343 "vec_stmt_relevant_p: stmt live but not relevant.\n");
344 *relevant
= vect_used_only_live
;
347 return (*live_p
|| *relevant
);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
357 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
360 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info
))
368 /* STMT has a data_ref. FORNOW this means that its of one of
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt
))
383 if (is_gimple_call (stmt
)
384 && gimple_call_internal_p (stmt
))
385 switch (gimple_call_internal_fn (stmt
))
388 operand
= gimple_call_arg (stmt
, 3);
393 operand
= gimple_call_arg (stmt
, 2);
403 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
405 operand
= gimple_assign_rhs1 (stmt
);
406 if (TREE_CODE (operand
) != SSA_NAME
)
417 Function process_use.
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
444 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
445 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
448 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
449 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
450 stmt_vec_info dstmt_vinfo
;
451 basic_block bb
, def_bb
;
453 enum vect_def_type dt
;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
460 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
464 "not vectorized: unsupported use in stmt.\n");
468 if (!def_stmt
|| gimple_nop_p (def_stmt
))
471 def_bb
= gimple_bb (def_stmt
);
472 if (!flow_bb_inside_loop_p (loop
, def_bb
))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
485 bb
= gimple_bb (stmt
);
486 if (gimple_code (stmt
) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
488 && gimple_code (def_stmt
) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
490 && bb
->loop_father
== def_bb
->loop_father
)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE
, vect_location
,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
496 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
510 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE
, vect_location
,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
518 case vect_unused_in_scope
:
519 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
520 vect_used_in_scope
: vect_unused_in_scope
;
523 case vect_used_in_outer_by_reduction
:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
525 relevant
= vect_used_by_reduction
;
528 case vect_used_in_outer
:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
530 relevant
= vect_used_in_scope
;
533 case vect_used_in_scope
:
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
548 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE
, vect_location
,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
556 case vect_unused_in_scope
:
557 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
559 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
562 case vect_used_by_reduction
:
563 case vect_used_only_live
:
564 relevant
= vect_used_in_outer_by_reduction
;
567 case vect_used_in_scope
:
568 relevant
= vect_used_in_outer
;
576 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
600 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
601 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
602 unsigned int nbbs
= loop
->num_nodes
;
603 gimple_stmt_iterator si
;
606 stmt_vec_info stmt_vinfo
;
610 enum vect_relevant relevant
;
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE
, vect_location
,
614 "=== vect_mark_stmts_to_be_vectorized ===\n");
616 auto_vec
<gimple
*, 64> worklist
;
618 /* 1. Init worklist. */
619 for (i
= 0; i
< nbbs
; i
++)
622 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
625 if (dump_enabled_p ())
627 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
628 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
631 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
632 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
634 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
636 stmt
= gsi_stmt (si
);
637 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
640 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
643 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
644 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
648 /* 2. Process_worklist */
649 while (worklist
.length () > 0)
654 stmt
= worklist
.pop ();
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
658 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
661 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
662 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 stmt_vinfo
= vinfo_for_stmt (stmt
);
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
689 "unsupported use of reduction.\n");
694 case vect_nested_cycle
:
695 if (relevant
!= vect_unused_in_scope
696 && relevant
!= vect_used_in_outer_by_reduction
697 && relevant
!= vect_used_in_outer
)
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
701 "unsupported use of nested cycle.\n");
707 case vect_double_reduction_def
:
708 if (relevant
!= vect_unused_in_scope
709 && relevant
!= vect_used_by_reduction
710 && relevant
!= vect_used_only_live
)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
714 "unsupported use of double reduction.\n");
724 if (is_pattern_stmt_p (stmt_vinfo
))
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt
))
731 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
732 tree op
= gimple_assign_rhs1 (stmt
);
735 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
737 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
738 relevant
, &worklist
, false)
739 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
740 relevant
, &worklist
, false))
744 for (; i
< gimple_num_ops (stmt
); i
++)
746 op
= gimple_op (stmt
, i
);
747 if (TREE_CODE (op
) == SSA_NAME
748 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
753 else if (is_gimple_call (stmt
))
755 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
757 tree arg
= gimple_call_arg (stmt
, i
);
758 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
765 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
767 tree op
= USE_FROM_PTR (use_p
);
768 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
773 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
776 tree decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
778 if (!process_use (stmt
, off
, loop_vinfo
, relevant
, &worklist
, true))
781 } /* while worklist */
787 /* Function vect_model_simple_cost.
789 Models cost for simple operations, i.e. those that only emit ncopies of a
790 single op. Right now, this does not account for multiple insns that could
791 be generated for the single vector op. We will handle that shortly. */
794 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
795 enum vect_def_type
*dt
,
796 stmt_vector_for_cost
*prologue_cost_vec
,
797 stmt_vector_for_cost
*body_cost_vec
)
800 int inside_cost
= 0, prologue_cost
= 0;
802 /* The SLP costs were already calculated during SLP tree build. */
803 if (PURE_SLP_STMT (stmt_info
))
806 /* FORNOW: Assuming maximum 2 args per stmts. */
807 for (i
= 0; i
< 2; i
++)
808 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
809 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
810 stmt_info
, 0, vect_prologue
);
812 /* Pass the inside-of-loop statements to the target-specific cost model. */
813 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
814 stmt_info
, 0, vect_body
);
816 if (dump_enabled_p ())
817 dump_printf_loc (MSG_NOTE
, vect_location
,
818 "vect_model_simple_cost: inside_cost = %d, "
819 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
823 /* Model cost for type demotion and promotion operations. PWR is normally
824 zero for single-step promotions and demotions. It will be one if
825 two-step promotion/demotion is required, and so on. Each additional
826 step doubles the number of instructions required. */
829 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
830 enum vect_def_type
*dt
, int pwr
)
833 int inside_cost
= 0, prologue_cost
= 0;
834 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
835 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
836 void *target_cost_data
;
838 /* The SLP costs were already calculated during SLP tree build. */
839 if (PURE_SLP_STMT (stmt_info
))
843 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
845 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
847 for (i
= 0; i
< pwr
+ 1; i
++)
849 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
851 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
852 vec_promote_demote
, stmt_info
, 0,
856 /* FORNOW: Assuming maximum 2 args per stmts. */
857 for (i
= 0; i
< 2; i
++)
858 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
859 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
860 stmt_info
, 0, vect_prologue
);
862 if (dump_enabled_p ())
863 dump_printf_loc (MSG_NOTE
, vect_location
,
864 "vect_model_promotion_demotion_cost: inside_cost = %d, "
865 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
868 /* Function vect_cost_group_size
870 For grouped load or store, return the group_size only if it is the first
871 load or store of a group, else return 1. This ensures that group size is
872 only returned once per group. */
875 vect_cost_group_size (stmt_vec_info stmt_info
)
877 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
879 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
880 return GROUP_SIZE (stmt_info
);
886 /* Function vect_model_store_cost
888 Models cost for stores. In the case of grouped accesses, one access
889 has the overhead of the grouped access attributed to it. */
892 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
893 bool store_lanes_p
, enum vect_def_type dt
,
895 stmt_vector_for_cost
*prologue_cost_vec
,
896 stmt_vector_for_cost
*body_cost_vec
)
899 unsigned int inside_cost
= 0, prologue_cost
= 0;
900 struct data_reference
*first_dr
;
903 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
904 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
905 stmt_info
, 0, vect_prologue
);
907 /* Grouped access? */
908 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
912 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
917 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
918 group_size
= vect_cost_group_size (stmt_info
);
921 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
923 /* Not a grouped access. */
927 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
930 /* We assume that the cost of a single store-lanes instruction is
931 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
932 access is instead being provided by a permute-and-store operation,
933 include the cost of the permutes. */
934 if (!store_lanes_p
&& group_size
> 1
935 && !STMT_VINFO_STRIDED_P (stmt_info
))
937 /* Uses a high and low interleave or shuffle operations for each
939 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
940 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
941 stmt_info
, 0, vect_body
);
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_NOTE
, vect_location
,
945 "vect_model_store_cost: strided group_size = %d .\n",
949 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
950 /* Costs of the stores. */
951 if (STMT_VINFO_STRIDED_P (stmt_info
)
952 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
954 /* N scalar stores plus extracting the elements. */
955 inside_cost
+= record_stmt_cost (body_cost_vec
,
956 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
957 scalar_store
, stmt_info
, 0, vect_body
);
960 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
962 if (STMT_VINFO_STRIDED_P (stmt_info
))
963 inside_cost
+= record_stmt_cost (body_cost_vec
,
964 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
965 vec_to_scalar
, stmt_info
, 0, vect_body
);
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_NOTE
, vect_location
,
969 "vect_model_store_cost: inside_cost = %d, "
970 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
974 /* Calculate cost of DR's memory access. */
976 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
977 unsigned int *inside_cost
,
978 stmt_vector_for_cost
*body_cost_vec
)
980 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
981 gimple
*stmt
= DR_STMT (dr
);
982 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
984 switch (alignment_support_scheme
)
988 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
989 vector_store
, stmt_info
, 0,
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_NOTE
, vect_location
,
994 "vect_model_store_cost: aligned.\n");
998 case dr_unaligned_supported
:
1000 /* Here, we assign an additional cost for the unaligned store. */
1001 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1002 unaligned_store
, stmt_info
,
1003 DR_MISALIGNMENT (dr
), vect_body
);
1004 if (dump_enabled_p ())
1005 dump_printf_loc (MSG_NOTE
, vect_location
,
1006 "vect_model_store_cost: unaligned supported by "
1011 case dr_unaligned_unsupported
:
1013 *inside_cost
= VECT_MAX_COST
;
1015 if (dump_enabled_p ())
1016 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1017 "vect_model_store_cost: unsupported access.\n");
1027 /* Function vect_model_load_cost
1029 Models cost for loads. In the case of grouped accesses, the last access
1030 has the overhead of the grouped access attributed to it. Since unaligned
1031 accesses are supported for loads, we also account for the costs of the
1032 access scheme chosen. */
1035 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1036 bool load_lanes_p
, slp_tree slp_node
,
1037 stmt_vector_for_cost
*prologue_cost_vec
,
1038 stmt_vector_for_cost
*body_cost_vec
)
1042 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1043 unsigned int inside_cost
= 0, prologue_cost
= 0;
1045 /* Grouped accesses? */
1046 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1049 group_size
= vect_cost_group_size (stmt_info
);
1050 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1052 /* Not a grouped access. */
1059 /* We assume that the cost of a single load-lanes instruction is
1060 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1061 access is instead being provided by a load-and-permute operation,
1062 include the cost of the permutes. */
1063 if (!load_lanes_p
&& group_size
> 1
1064 && !STMT_VINFO_STRIDED_P (stmt_info
))
1066 /* Uses an even and odd extract operations or shuffle operations
1067 for each needed permute. */
1068 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1069 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1070 stmt_info
, 0, vect_body
);
1072 if (dump_enabled_p ())
1073 dump_printf_loc (MSG_NOTE
, vect_location
,
1074 "vect_model_load_cost: strided group_size = %d .\n",
1078 /* The loads themselves. */
1079 if (STMT_VINFO_STRIDED_P (stmt_info
)
1080 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1082 /* N scalar loads plus gathering them into a vector. */
1083 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1084 inside_cost
+= record_stmt_cost (body_cost_vec
,
1085 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1086 scalar_load
, stmt_info
, 0, vect_body
);
1089 vect_get_load_cost (first_dr
, ncopies
,
1090 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1091 || group_size
> 1 || slp_node
),
1092 &inside_cost
, &prologue_cost
,
1093 prologue_cost_vec
, body_cost_vec
, true);
1094 if (STMT_VINFO_STRIDED_P (stmt_info
))
1095 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1096 stmt_info
, 0, vect_body
);
1098 if (dump_enabled_p ())
1099 dump_printf_loc (MSG_NOTE
, vect_location
,
1100 "vect_model_load_cost: inside_cost = %d, "
1101 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1105 /* Calculate cost of DR's memory access. */
1107 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1108 bool add_realign_cost
, unsigned int *inside_cost
,
1109 unsigned int *prologue_cost
,
1110 stmt_vector_for_cost
*prologue_cost_vec
,
1111 stmt_vector_for_cost
*body_cost_vec
,
1112 bool record_prologue_costs
)
1114 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1115 gimple
*stmt
= DR_STMT (dr
);
1116 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1118 switch (alignment_support_scheme
)
1122 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1123 stmt_info
, 0, vect_body
);
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_NOTE
, vect_location
,
1127 "vect_model_load_cost: aligned.\n");
1131 case dr_unaligned_supported
:
1133 /* Here, we assign an additional cost for the unaligned load. */
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1135 unaligned_load
, stmt_info
,
1136 DR_MISALIGNMENT (dr
), vect_body
);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE
, vect_location
,
1140 "vect_model_load_cost: unaligned supported by "
1145 case dr_explicit_realign
:
1147 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1148 vector_load
, stmt_info
, 0, vect_body
);
1149 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1150 vec_perm
, stmt_info
, 0, vect_body
);
1152 /* FIXME: If the misalignment remains fixed across the iterations of
1153 the containing loop, the following cost should be added to the
1155 if (targetm
.vectorize
.builtin_mask_for_load
)
1156 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1157 stmt_info
, 0, vect_body
);
1159 if (dump_enabled_p ())
1160 dump_printf_loc (MSG_NOTE
, vect_location
,
1161 "vect_model_load_cost: explicit realign\n");
1165 case dr_explicit_realign_optimized
:
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: unaligned software "
1172 /* Unaligned software pipeline has a load of an address, an initial
1173 load, and possibly a mask operation to "prime" the loop. However,
1174 if this is an access in a group of loads, which provide grouped
1175 access, then the above cost should only be considered for one
1176 access in the group. Inside the loop, there is a load op
1177 and a realignment op. */
1179 if (add_realign_cost
&& record_prologue_costs
)
1181 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1182 vector_stmt
, stmt_info
,
1184 if (targetm
.vectorize
.builtin_mask_for_load
)
1185 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1186 vector_stmt
, stmt_info
,
1190 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1191 stmt_info
, 0, vect_body
);
1192 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1193 stmt_info
, 0, vect_body
);
1195 if (dump_enabled_p ())
1196 dump_printf_loc (MSG_NOTE
, vect_location
,
1197 "vect_model_load_cost: explicit realign optimized"
1203 case dr_unaligned_unsupported
:
1205 *inside_cost
= VECT_MAX_COST
;
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1209 "vect_model_load_cost: unsupported access.\n");
1218 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1219 the loop preheader for the vectorized stmt STMT. */
1222 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1225 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1228 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1229 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1233 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1237 if (nested_in_vect_loop_p (loop
, stmt
))
1240 pe
= loop_preheader_edge (loop
);
1241 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1242 gcc_assert (!new_bb
);
1246 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1248 gimple_stmt_iterator gsi_bb_start
;
1250 gcc_assert (bb_vinfo
);
1251 bb
= BB_VINFO_BB (bb_vinfo
);
1252 gsi_bb_start
= gsi_after_labels (bb
);
1253 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1257 if (dump_enabled_p ())
1259 dump_printf_loc (MSG_NOTE
, vect_location
,
1260 "created new init_stmt: ");
1261 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1265 /* Function vect_init_vector.
1267 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1268 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1269 vector type a vector with all elements equal to VAL is created first.
1270 Place the initialization at BSI if it is not NULL. Otherwise, place the
1271 initialization at the loop preheader.
1272 Return the DEF of INIT_STMT.
1273 It will be used in the vectorization of STMT. */
1276 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1284 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1285 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type
))
1291 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1292 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1294 if (CONSTANT_CLASS_P (val
))
1295 val
= integer_zerop (val
) ? false_val
: true_val
;
1298 new_temp
= make_ssa_name (TREE_TYPE (type
));
1299 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1300 val
, true_val
, false_val
);
1301 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1305 else if (CONSTANT_CLASS_P (val
))
1306 val
= fold_convert (TREE_TYPE (type
), val
);
1309 new_temp
= make_ssa_name (TREE_TYPE (type
));
1310 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1311 init_stmt
= gimple_build_assign (new_temp
,
1312 fold_build1 (VIEW_CONVERT_EXPR
,
1316 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1317 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1321 val
= build_vector_from_val (type
, val
);
1324 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1325 init_stmt
= gimple_build_assign (new_temp
, val
);
1326 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1330 /* Function vect_get_vec_def_for_operand_1.
1332 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1333 DT that will be used in the vectorized stmt. */
1336 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1340 stmt_vec_info def_stmt_info
= NULL
;
1344 /* operand is a constant or a loop invariant. */
1345 case vect_constant_def
:
1346 case vect_external_def
:
1347 /* Code should use vect_get_vec_def_for_operand. */
1350 /* operand is defined inside the loop. */
1351 case vect_internal_def
:
1353 /* Get the def from the vectorized stmt. */
1354 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1356 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1357 /* Get vectorized pattern statement. */
1359 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1360 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1361 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1362 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1363 gcc_assert (vec_stmt
);
1364 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1365 vec_oprnd
= PHI_RESULT (vec_stmt
);
1366 else if (is_gimple_call (vec_stmt
))
1367 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1369 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1373 /* operand is defined by a loop header phi - reduction */
1374 case vect_reduction_def
:
1375 case vect_double_reduction_def
:
1376 case vect_nested_cycle
:
1377 /* Code should use get_initial_def_for_reduction. */
1380 /* operand is defined by loop-header phi - induction. */
1381 case vect_induction_def
:
1383 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1387 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1388 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1389 vec_oprnd
= PHI_RESULT (vec_stmt
);
1391 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1401 /* Function vect_get_vec_def_for_operand.
1403 OP is an operand in STMT. This function returns a (vector) def that will be
1404 used in the vectorized stmt for STMT.
1406 In the case that OP is an SSA_NAME which is defined in the loop, then
1407 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1409 In case OP is an invariant or constant, a new stmt that creates a vector def
1410 needs to be introduced. VECTYPE may be used to specify a required type for
1411 vector invariant. */
1414 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1417 enum vect_def_type dt
;
1419 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1420 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1422 if (dump_enabled_p ())
1424 dump_printf_loc (MSG_NOTE
, vect_location
,
1425 "vect_get_vec_def_for_operand: ");
1426 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1427 dump_printf (MSG_NOTE
, "\n");
1430 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1431 gcc_assert (is_simple_use
);
1432 if (def_stmt
&& dump_enabled_p ())
1434 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1435 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1438 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1440 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1444 vector_type
= vectype
;
1445 else if (TREE_CODE (TREE_TYPE (op
)) == BOOLEAN_TYPE
1446 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1447 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1449 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1451 gcc_assert (vector_type
);
1452 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1455 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1459 /* Function vect_get_vec_def_for_stmt_copy
1461 Return a vector-def for an operand. This function is used when the
1462 vectorized stmt to be created (by the caller to this function) is a "copy"
1463 created in case the vectorized result cannot fit in one vector, and several
1464 copies of the vector-stmt are required. In this case the vector-def is
1465 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1466 of the stmt that defines VEC_OPRND.
1467 DT is the type of the vector def VEC_OPRND.
1470 In case the vectorization factor (VF) is bigger than the number
1471 of elements that can fit in a vectype (nunits), we have to generate
1472 more than one vector stmt to vectorize the scalar stmt. This situation
1473 arises when there are multiple data-types operated upon in the loop; the
1474 smallest data-type determines the VF, and as a result, when vectorizing
1475 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1476 vector stmt (each computing a vector of 'nunits' results, and together
1477 computing 'VF' results in each iteration). This function is called when
1478 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1479 which VF=16 and nunits=4, so the number of copies required is 4):
1481 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1483 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1484 VS1.1: vx.1 = memref1 VS1.2
1485 VS1.2: vx.2 = memref2 VS1.3
1486 VS1.3: vx.3 = memref3
1488 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1489 VSnew.1: vz1 = vx.1 + ... VSnew.2
1490 VSnew.2: vz2 = vx.2 + ... VSnew.3
1491 VSnew.3: vz3 = vx.3 + ...
1493 The vectorization of S1 is explained in vectorizable_load.
1494 The vectorization of S2:
1495 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1496 the function 'vect_get_vec_def_for_operand' is called to
1497 get the relevant vector-def for each operand of S2. For operand x it
1498 returns the vector-def 'vx.0'.
1500 To create the remaining copies of the vector-stmt (VSnew.j), this
1501 function is called to get the relevant vector-def for each operand. It is
1502 obtained from the respective VS1.j stmt, which is recorded in the
1503 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1505 For example, to obtain the vector-def 'vx.1' in order to create the
1506 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1507 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1508 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1509 and return its def ('vx.1').
1510 Overall, to create the above sequence this function will be called 3 times:
1511 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1512 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1513 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1516 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1518 gimple
*vec_stmt_for_operand
;
1519 stmt_vec_info def_stmt_info
;
1521 /* Do nothing; can reuse same def. */
1522 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1525 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1526 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1527 gcc_assert (def_stmt_info
);
1528 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1529 gcc_assert (vec_stmt_for_operand
);
1530 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1531 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1533 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1538 /* Get vectorized definitions for the operands to create a copy of an original
1539 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1542 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1543 vec
<tree
> *vec_oprnds0
,
1544 vec
<tree
> *vec_oprnds1
)
1546 tree vec_oprnd
= vec_oprnds0
->pop ();
1548 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1549 vec_oprnds0
->quick_push (vec_oprnd
);
1551 if (vec_oprnds1
&& vec_oprnds1
->length ())
1553 vec_oprnd
= vec_oprnds1
->pop ();
1554 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1555 vec_oprnds1
->quick_push (vec_oprnd
);
1560 /* Get vectorized definitions for OP0 and OP1.
1561 REDUC_INDEX is the index of reduction operand in case of reduction,
1562 and -1 otherwise. */
1565 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1566 vec
<tree
> *vec_oprnds0
,
1567 vec
<tree
> *vec_oprnds1
,
1568 slp_tree slp_node
, int reduc_index
)
1572 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1573 auto_vec
<tree
> ops (nops
);
1574 auto_vec
<vec
<tree
> > vec_defs (nops
);
1576 ops
.quick_push (op0
);
1578 ops
.quick_push (op1
);
1580 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1582 *vec_oprnds0
= vec_defs
[0];
1584 *vec_oprnds1
= vec_defs
[1];
1590 vec_oprnds0
->create (1);
1591 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1592 vec_oprnds0
->quick_push (vec_oprnd
);
1596 vec_oprnds1
->create (1);
1597 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1598 vec_oprnds1
->quick_push (vec_oprnd
);
1604 /* Function vect_finish_stmt_generation.
1606 Insert a new stmt. */
1609 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1610 gimple_stmt_iterator
*gsi
)
1612 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1613 vec_info
*vinfo
= stmt_info
->vinfo
;
1615 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1617 if (!gsi_end_p (*gsi
)
1618 && gimple_has_mem_ops (vec_stmt
))
1620 gimple
*at_stmt
= gsi_stmt (*gsi
);
1621 tree vuse
= gimple_vuse (at_stmt
);
1622 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1624 tree vdef
= gimple_vdef (at_stmt
);
1625 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1631 && ((is_gimple_assign (vec_stmt
)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1633 || (is_gimple_call (vec_stmt
)
1634 && !(gimple_call_flags (vec_stmt
)
1635 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1637 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1638 gimple_set_vdef (vec_stmt
, new_vdef
);
1639 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1643 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1645 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1647 if (dump_enabled_p ())
1649 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1653 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1659 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1660 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1663 /* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
1669 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1670 tree vectype_out
, tree vectype_in
)
1673 if (internal_fn_p (cfn
))
1674 ifn
= as_internal_fn (cfn
);
1676 ifn
= associated_internal_fn (fndecl
);
1677 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1679 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1680 if (info
.vectorizable
)
1682 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1683 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1684 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1685 OPTIMIZE_FOR_SPEED
))
1693 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1694 gimple_stmt_iterator
*);
1697 /* Function vectorizable_mask_load_store.
1699 Check if STMT performs a conditional load or store that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1705 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1706 gimple
**vec_stmt
, slp_tree slp_node
)
1708 tree vec_dest
= NULL
;
1709 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1710 stmt_vec_info prev_stmt_info
;
1711 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1712 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1713 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1714 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1715 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1716 tree rhs_vectype
= NULL_TREE
;
1721 tree dataref_ptr
= NULL_TREE
;
1723 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1727 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1728 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1729 int gather_scale
= 1;
1730 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1734 enum vect_def_type dt
;
1736 if (slp_node
!= NULL
)
1739 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1740 gcc_assert (ncopies
>= 1);
1742 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1743 mask
= gimple_call_arg (stmt
, 2);
1745 if (TREE_CODE (TREE_TYPE (mask
)) != BOOLEAN_TYPE
)
1748 /* FORNOW. This restriction should be relaxed. */
1749 if (nested_in_vect_loop
&& ncopies
> 1)
1751 if (dump_enabled_p ())
1752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1753 "multiple types in nested loop.");
1757 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1760 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
1764 if (!STMT_VINFO_DATA_REF (stmt_info
))
1767 elem_type
= TREE_TYPE (vectype
);
1769 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1772 if (STMT_VINFO_STRIDED_P (stmt_info
))
1775 if (TREE_CODE (mask
) != SSA_NAME
)
1778 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
1782 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
1784 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
1785 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
1790 tree rhs
= gimple_call_arg (stmt
, 3);
1791 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
1795 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1798 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
1799 &gather_off
, &gather_scale
);
1800 gcc_assert (gather_decl
);
1801 if (!vect_is_simple_use (gather_off
, loop_vinfo
, &def_stmt
, &gather_dt
,
1802 &gather_off_vectype
))
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1806 "gather index use not simple.");
1810 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1812 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1813 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1815 if (dump_enabled_p ())
1816 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1817 "masked gather with integer mask not supported.");
1821 else if (tree_int_cst_compare (nested_in_vect_loop
1822 ? STMT_VINFO_DR_STEP (stmt_info
)
1823 : DR_STEP (dr
), size_zero_node
) <= 0)
1825 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1826 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
1827 TYPE_MODE (mask_vectype
),
1830 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
1833 if (!vec_stmt
) /* transformation not required. */
1835 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1837 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1840 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1846 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1848 tree vec_oprnd0
= NULL_TREE
, op
;
1849 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1850 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1851 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1852 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1853 tree mask_perm_mask
= NULL_TREE
;
1854 edge pe
= loop_preheader_edge (loop
);
1857 enum { NARROW
, NONE
, WIDEN
} modifier
;
1858 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1860 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1861 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1862 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1863 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1864 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1865 scaletype
= TREE_VALUE (arglist
);
1866 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1867 && types_compatible_p (srctype
, masktype
));
1869 if (nunits
== gather_off_nunits
)
1871 else if (nunits
== gather_off_nunits
/ 2)
1873 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1876 for (i
= 0; i
< gather_off_nunits
; ++i
)
1877 sel
[i
] = i
| nunits
;
1879 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1881 else if (nunits
== gather_off_nunits
* 2)
1883 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1886 for (i
= 0; i
< nunits
; ++i
)
1887 sel
[i
] = i
< gather_off_nunits
1888 ? i
: i
+ nunits
- gather_off_nunits
;
1890 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1892 for (i
= 0; i
< nunits
; ++i
)
1893 sel
[i
] = i
| gather_off_nunits
;
1894 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1899 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1901 ptr
= fold_convert (ptrtype
, gather_base
);
1902 if (!is_gimple_min_invariant (ptr
))
1904 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1905 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1906 gcc_assert (!new_bb
);
1909 scale
= build_int_cst (scaletype
, gather_scale
);
1911 prev_stmt_info
= NULL
;
1912 for (j
= 0; j
< ncopies
; ++j
)
1914 if (modifier
== WIDEN
&& (j
& 1))
1915 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1916 perm_mask
, stmt
, gsi
);
1919 = vect_get_vec_def_for_operand (gather_off
, stmt
);
1922 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1924 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1927 == TYPE_VECTOR_SUBPARTS (idxtype
));
1928 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
1929 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1931 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1932 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1936 if (mask_perm_mask
&& (j
& 1))
1937 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1938 mask_perm_mask
, stmt
, gsi
);
1942 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
1945 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
1946 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1950 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1952 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1953 == TYPE_VECTOR_SUBPARTS (masktype
));
1954 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
1955 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1957 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
1958 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1964 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
1967 if (!useless_type_conversion_p (vectype
, rettype
))
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
1970 == TYPE_VECTOR_SUBPARTS (rettype
));
1971 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
1972 gimple_call_set_lhs (new_stmt
, op
);
1973 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1974 var
= make_ssa_name (vec_dest
);
1975 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
1976 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1980 var
= make_ssa_name (vec_dest
, new_stmt
);
1981 gimple_call_set_lhs (new_stmt
, var
);
1984 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1986 if (modifier
== NARROW
)
1993 var
= permute_vec_elements (prev_res
, var
,
1994 perm_mask
, stmt
, gsi
);
1995 new_stmt
= SSA_NAME_DEF_STMT (var
);
1998 if (prev_stmt_info
== NULL
)
1999 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2001 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2002 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2005 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2007 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2009 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2010 stmt_info
= vinfo_for_stmt (stmt
);
2012 tree lhs
= gimple_call_lhs (stmt
);
2013 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2014 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2015 set_vinfo_for_stmt (stmt
, NULL
);
2016 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2017 gsi_replace (gsi
, new_stmt
, true);
2022 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2023 prev_stmt_info
= NULL
;
2024 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2025 for (i
= 0; i
< ncopies
; i
++)
2027 unsigned align
, misalign
;
2031 tree rhs
= gimple_call_arg (stmt
, 3);
2032 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2033 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2034 /* We should have catched mismatched types earlier. */
2035 gcc_assert (useless_type_conversion_p (vectype
,
2036 TREE_TYPE (vec_rhs
)));
2037 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2038 NULL_TREE
, &dummy
, gsi
,
2039 &ptr_incr
, false, &inv_p
);
2040 gcc_assert (!inv_p
);
2044 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2045 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2046 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2047 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2048 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2049 TYPE_SIZE_UNIT (vectype
));
2052 align
= TYPE_ALIGN_UNIT (vectype
);
2053 if (aligned_access_p (dr
))
2055 else if (DR_MISALIGNMENT (dr
) == -1)
2057 align
= TYPE_ALIGN_UNIT (elem_type
);
2061 misalign
= DR_MISALIGNMENT (dr
);
2062 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2064 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2065 misalign
? misalign
& -misalign
: align
);
2067 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2068 ptr
, vec_mask
, vec_rhs
);
2069 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2071 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2073 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2074 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2079 tree vec_mask
= NULL_TREE
;
2080 prev_stmt_info
= NULL
;
2081 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2082 for (i
= 0; i
< ncopies
; i
++)
2084 unsigned align
, misalign
;
2088 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2089 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2090 NULL_TREE
, &dummy
, gsi
,
2091 &ptr_incr
, false, &inv_p
);
2092 gcc_assert (!inv_p
);
2096 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2097 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2098 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2099 TYPE_SIZE_UNIT (vectype
));
2102 align
= TYPE_ALIGN_UNIT (vectype
);
2103 if (aligned_access_p (dr
))
2105 else if (DR_MISALIGNMENT (dr
) == -1)
2107 align
= TYPE_ALIGN_UNIT (elem_type
);
2111 misalign
= DR_MISALIGNMENT (dr
);
2112 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2114 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2115 misalign
? misalign
& -misalign
: align
);
2117 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2119 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2120 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2122 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2125 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2131 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2133 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2135 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2136 stmt_info
= vinfo_for_stmt (stmt
);
2138 tree lhs
= gimple_call_lhs (stmt
);
2139 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2140 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2141 set_vinfo_for_stmt (stmt
, NULL
);
2142 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2143 gsi_replace (gsi
, new_stmt
, true);
2149 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2150 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2151 in a single step. On success, store the binary pack code in
2155 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2156 tree_code
*convert_code
)
2158 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2159 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2163 int multi_step_cvt
= 0;
2164 auto_vec
<tree
, 8> interm_types
;
2165 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2166 &code
, &multi_step_cvt
,
2171 *convert_code
= code
;
2175 /* Function vectorizable_call.
2177 Check if GS performs a function call that can be vectorized.
2178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2183 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2190 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2191 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2192 tree vectype_out
, vectype_in
;
2195 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2196 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2197 vec_info
*vinfo
= stmt_info
->vinfo
;
2198 tree fndecl
, new_temp
, rhs_type
;
2200 enum vect_def_type dt
[3]
2201 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2202 gimple
*new_stmt
= NULL
;
2204 vec
<tree
> vargs
= vNULL
;
2205 enum { NARROW
, NONE
, WIDEN
} modifier
;
2209 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2212 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2216 /* Is GS a vectorizable call? */
2217 stmt
= dyn_cast
<gcall
*> (gs
);
2221 if (gimple_call_internal_p (stmt
)
2222 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2223 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2224 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2227 if (gimple_call_lhs (stmt
) == NULL_TREE
2228 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2231 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2233 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2235 /* Process function arguments. */
2236 rhs_type
= NULL_TREE
;
2237 vectype_in
= NULL_TREE
;
2238 nargs
= gimple_call_num_args (stmt
);
2240 /* Bail out if the function has more than three arguments, we do not have
2241 interesting builtin functions to vectorize with more than two arguments
2242 except for fma. No arguments is also not good. */
2243 if (nargs
== 0 || nargs
> 3)
2246 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2247 if (gimple_call_internal_p (stmt
)
2248 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2251 rhs_type
= unsigned_type_node
;
2254 for (i
= 0; i
< nargs
; i
++)
2258 op
= gimple_call_arg (stmt
, i
);
2260 /* We can only handle calls with arguments of the same type. */
2262 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2266 "argument types differ.\n");
2270 rhs_type
= TREE_TYPE (op
);
2272 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2276 "use not simple.\n");
2281 vectype_in
= opvectype
;
2283 && opvectype
!= vectype_in
)
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2287 "argument vector types differ.\n");
2291 /* If all arguments are external or constant defs use a vector type with
2292 the same size as the output vector type. */
2294 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2296 gcc_assert (vectype_in
);
2299 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2302 "no vectype for scalar type ");
2303 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2304 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2311 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2312 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2313 if (nunits_in
== nunits_out
/ 2)
2315 else if (nunits_out
== nunits_in
)
2317 else if (nunits_out
== nunits_in
/ 2)
2322 /* We only handle functions that do not read or clobber memory. */
2323 if (gimple_vuse (stmt
))
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2327 "function reads from or writes to memory.\n");
2331 /* For now, we only vectorize functions if a target specific builtin
2332 is available. TODO -- in some cases, it might be profitable to
2333 insert the calls for pieces of the vector, in order to be able
2334 to vectorize other operations in the loop. */
2336 internal_fn ifn
= IFN_LAST
;
2337 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2338 tree callee
= gimple_call_fndecl (stmt
);
2340 /* First try using an internal function. */
2341 tree_code convert_code
= ERROR_MARK
;
2343 && (modifier
== NONE
2344 || (modifier
== NARROW
2345 && simple_integer_narrowing (vectype_out
, vectype_in
,
2347 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2350 /* If that fails, try asking for a target-specific built-in function. */
2351 if (ifn
== IFN_LAST
)
2353 if (cfn
!= CFN_LAST
)
2354 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2355 (cfn
, vectype_out
, vectype_in
);
2357 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2358 (callee
, vectype_out
, vectype_in
);
2361 if (ifn
== IFN_LAST
&& !fndecl
)
2363 if (cfn
== CFN_GOMP_SIMD_LANE
2366 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2367 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2368 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2369 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2371 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2372 { 0, 1, 2, ... vf - 1 } vector. */
2373 gcc_assert (nargs
== 0);
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2379 "function is not vectorizable.\n");
2386 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2387 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2389 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2391 /* Sanity check: make sure that at least one copy of the vectorized stmt
2392 needs to be generated. */
2393 gcc_assert (ncopies
>= 1);
2395 if (!vec_stmt
) /* transformation not required. */
2397 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2398 if (dump_enabled_p ())
2399 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2401 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2402 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2403 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2404 vec_promote_demote
, stmt_info
, 0, vect_body
);
2411 if (dump_enabled_p ())
2412 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2415 scalar_dest
= gimple_call_lhs (stmt
);
2416 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2418 prev_stmt_info
= NULL
;
2419 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2421 tree prev_res
= NULL_TREE
;
2422 for (j
= 0; j
< ncopies
; ++j
)
2424 /* Build argument list for the vectorized call. */
2426 vargs
.create (nargs
);
2432 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2433 vec
<tree
> vec_oprnds0
;
2435 for (i
= 0; i
< nargs
; i
++)
2436 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2437 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2438 vec_oprnds0
= vec_defs
[0];
2440 /* Arguments are ready. Create the new vector stmt. */
2441 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2444 for (k
= 0; k
< nargs
; k
++)
2446 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2447 vargs
[k
] = vec_oprndsk
[i
];
2449 if (modifier
== NARROW
)
2451 tree half_res
= make_ssa_name (vectype_in
);
2452 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2453 gimple_call_set_lhs (new_stmt
, half_res
);
2454 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2457 prev_res
= half_res
;
2460 new_temp
= make_ssa_name (vec_dest
);
2461 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2462 prev_res
, half_res
);
2466 if (ifn
!= IFN_LAST
)
2467 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2469 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2470 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2471 gimple_call_set_lhs (new_stmt
, new_temp
);
2473 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2474 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2477 for (i
= 0; i
< nargs
; i
++)
2479 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2480 vec_oprndsi
.release ();
2485 for (i
= 0; i
< nargs
; i
++)
2487 op
= gimple_call_arg (stmt
, i
);
2490 = vect_get_vec_def_for_operand (op
, stmt
);
2493 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2495 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2498 vargs
.quick_push (vec_oprnd0
);
2501 if (gimple_call_internal_p (stmt
)
2502 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2504 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2506 for (k
= 0; k
< nunits_out
; ++k
)
2507 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2508 tree cst
= build_vector (vectype_out
, v
);
2510 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2511 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2512 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2513 new_temp
= make_ssa_name (vec_dest
);
2514 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2516 else if (modifier
== NARROW
)
2518 tree half_res
= make_ssa_name (vectype_in
);
2519 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2520 gimple_call_set_lhs (new_stmt
, half_res
);
2521 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2524 prev_res
= half_res
;
2527 new_temp
= make_ssa_name (vec_dest
);
2528 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2529 prev_res
, half_res
);
2533 if (ifn
!= IFN_LAST
)
2534 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2536 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2537 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2538 gimple_call_set_lhs (new_stmt
, new_temp
);
2540 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2542 if (j
== (modifier
== NARROW
? 1 : 0))
2543 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2545 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2547 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2550 else if (modifier
== NARROW
)
2552 for (j
= 0; j
< ncopies
; ++j
)
2554 /* Build argument list for the vectorized call. */
2556 vargs
.create (nargs
* 2);
2562 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2563 vec
<tree
> vec_oprnds0
;
2565 for (i
= 0; i
< nargs
; i
++)
2566 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2567 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2568 vec_oprnds0
= vec_defs
[0];
2570 /* Arguments are ready. Create the new vector stmt. */
2571 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2575 for (k
= 0; k
< nargs
; k
++)
2577 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2578 vargs
.quick_push (vec_oprndsk
[i
]);
2579 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2581 if (ifn
!= IFN_LAST
)
2582 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2584 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2585 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2586 gimple_call_set_lhs (new_stmt
, new_temp
);
2587 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2588 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2591 for (i
= 0; i
< nargs
; i
++)
2593 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2594 vec_oprndsi
.release ();
2599 for (i
= 0; i
< nargs
; i
++)
2601 op
= gimple_call_arg (stmt
, i
);
2605 = vect_get_vec_def_for_operand (op
, stmt
);
2607 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2611 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2613 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2615 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2618 vargs
.quick_push (vec_oprnd0
);
2619 vargs
.quick_push (vec_oprnd1
);
2622 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2623 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2624 gimple_call_set_lhs (new_stmt
, new_temp
);
2625 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2628 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2630 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2632 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2635 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2638 /* No current target implements this case. */
2643 /* The call in STMT might prevent it from being removed in dce.
2644 We however cannot remove it here, due to the way the ssa name
2645 it defines is mapped to the new definition. So just replace
2646 rhs of the statement with something harmless. */
2651 type
= TREE_TYPE (scalar_dest
);
2652 if (is_pattern_stmt_p (stmt_info
))
2653 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2655 lhs
= gimple_call_lhs (stmt
);
2657 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2658 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2659 set_vinfo_for_stmt (stmt
, NULL
);
2660 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2661 gsi_replace (gsi
, new_stmt
, false);
2667 struct simd_call_arg_info
2671 enum vect_def_type dt
;
2672 HOST_WIDE_INT linear_step
;
2674 bool simd_lane_linear
;
2677 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2678 is linear within simd lane (but not within whole loop), note it in
2682 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2683 struct simd_call_arg_info
*arginfo
)
2685 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2687 if (!is_gimple_assign (def_stmt
)
2688 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2689 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2692 tree base
= gimple_assign_rhs1 (def_stmt
);
2693 HOST_WIDE_INT linear_step
= 0;
2694 tree v
= gimple_assign_rhs2 (def_stmt
);
2695 while (TREE_CODE (v
) == SSA_NAME
)
2698 def_stmt
= SSA_NAME_DEF_STMT (v
);
2699 if (is_gimple_assign (def_stmt
))
2700 switch (gimple_assign_rhs_code (def_stmt
))
2703 t
= gimple_assign_rhs2 (def_stmt
);
2704 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2706 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2707 v
= gimple_assign_rhs1 (def_stmt
);
2710 t
= gimple_assign_rhs2 (def_stmt
);
2711 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2713 linear_step
= tree_to_shwi (t
);
2714 v
= gimple_assign_rhs1 (def_stmt
);
2717 t
= gimple_assign_rhs1 (def_stmt
);
2718 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2719 || (TYPE_PRECISION (TREE_TYPE (v
))
2720 < TYPE_PRECISION (TREE_TYPE (t
))))
2729 else if (is_gimple_call (def_stmt
)
2730 && gimple_call_internal_p (def_stmt
)
2731 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
2733 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
2734 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
2739 arginfo
->linear_step
= linear_step
;
2741 arginfo
->simd_lane_linear
= true;
2747 /* Function vectorizable_simd_clone_call.
2749 Check if STMT performs a function call that can be vectorized
2750 by calling a simd clone of the function.
2751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2752 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2753 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2756 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2757 gimple
**vec_stmt
, slp_tree slp_node
)
2762 tree vec_oprnd0
= NULL_TREE
;
2763 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2765 unsigned int nunits
;
2766 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2767 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2768 vec_info
*vinfo
= stmt_info
->vinfo
;
2769 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2770 tree fndecl
, new_temp
;
2772 gimple
*new_stmt
= NULL
;
2774 auto_vec
<simd_call_arg_info
> arginfo
;
2775 vec
<tree
> vargs
= vNULL
;
2777 tree lhs
, rtype
, ratype
;
2778 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2780 /* Is STMT a vectorizable call? */
2781 if (!is_gimple_call (stmt
))
2784 fndecl
= gimple_call_fndecl (stmt
);
2785 if (fndecl
== NULL_TREE
)
2788 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2789 if (node
== NULL
|| node
->simd_clones
== NULL
)
2792 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2795 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2799 if (gimple_call_lhs (stmt
)
2800 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2803 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2805 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2807 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2814 /* Process function arguments. */
2815 nargs
= gimple_call_num_args (stmt
);
2817 /* Bail out if the function has zero arguments. */
2821 arginfo
.reserve (nargs
, true);
2823 for (i
= 0; i
< nargs
; i
++)
2825 simd_call_arg_info thisarginfo
;
2828 thisarginfo
.linear_step
= 0;
2829 thisarginfo
.align
= 0;
2830 thisarginfo
.op
= NULL_TREE
;
2831 thisarginfo
.simd_lane_linear
= false;
2833 op
= gimple_call_arg (stmt
, i
);
2834 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
2835 &thisarginfo
.vectype
)
2836 || thisarginfo
.dt
== vect_uninitialized_def
)
2838 if (dump_enabled_p ())
2839 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2840 "use not simple.\n");
2844 if (thisarginfo
.dt
== vect_constant_def
2845 || thisarginfo
.dt
== vect_external_def
)
2846 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2848 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2850 /* For linear arguments, the analyze phase should have saved
2851 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2852 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2853 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
2855 gcc_assert (vec_stmt
);
2856 thisarginfo
.linear_step
2857 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
2859 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
2860 thisarginfo
.simd_lane_linear
2861 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
2862 == boolean_true_node
);
2863 /* If loop has been peeled for alignment, we need to adjust it. */
2864 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2865 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2866 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
2868 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2869 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
2870 tree opt
= TREE_TYPE (thisarginfo
.op
);
2871 bias
= fold_convert (TREE_TYPE (step
), bias
);
2872 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2874 = fold_build2 (POINTER_TYPE_P (opt
)
2875 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2876 thisarginfo
.op
, bias
);
2880 && thisarginfo
.dt
!= vect_constant_def
2881 && thisarginfo
.dt
!= vect_external_def
2883 && TREE_CODE (op
) == SSA_NAME
2884 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2886 && tree_fits_shwi_p (iv
.step
))
2888 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2889 thisarginfo
.op
= iv
.base
;
2891 else if ((thisarginfo
.dt
== vect_constant_def
2892 || thisarginfo
.dt
== vect_external_def
)
2893 && POINTER_TYPE_P (TREE_TYPE (op
)))
2894 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2895 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2897 if (POINTER_TYPE_P (TREE_TYPE (op
))
2898 && !thisarginfo
.linear_step
2900 && thisarginfo
.dt
!= vect_constant_def
2901 && thisarginfo
.dt
!= vect_external_def
2904 && TREE_CODE (op
) == SSA_NAME
)
2905 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
2907 arginfo
.quick_push (thisarginfo
);
2910 unsigned int badness
= 0;
2911 struct cgraph_node
*bestn
= NULL
;
2912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2913 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2915 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2916 n
= n
->simdclone
->next_clone
)
2918 unsigned int this_badness
= 0;
2919 if (n
->simdclone
->simdlen
2920 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2921 || n
->simdclone
->nargs
!= nargs
)
2923 if (n
->simdclone
->simdlen
2924 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2925 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2926 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2927 if (n
->simdclone
->inbranch
)
2928 this_badness
+= 2048;
2929 int target_badness
= targetm
.simd_clone
.usable (n
);
2930 if (target_badness
< 0)
2932 this_badness
+= target_badness
* 512;
2933 /* FORNOW: Have to add code to add the mask argument. */
2934 if (n
->simdclone
->inbranch
)
2936 for (i
= 0; i
< nargs
; i
++)
2938 switch (n
->simdclone
->args
[i
].arg_type
)
2940 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2941 if (!useless_type_conversion_p
2942 (n
->simdclone
->args
[i
].orig_type
,
2943 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2945 else if (arginfo
[i
].dt
== vect_constant_def
2946 || arginfo
[i
].dt
== vect_external_def
2947 || arginfo
[i
].linear_step
)
2950 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2951 if (arginfo
[i
].dt
!= vect_constant_def
2952 && arginfo
[i
].dt
!= vect_external_def
)
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
2957 if (arginfo
[i
].dt
== vect_constant_def
2958 || arginfo
[i
].dt
== vect_external_def
2959 || (arginfo
[i
].linear_step
2960 != n
->simdclone
->args
[i
].linear_step
))
2963 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2964 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
2965 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
2966 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
2967 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
2968 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
2972 case SIMD_CLONE_ARG_TYPE_MASK
:
2975 if (i
== (size_t) -1)
2977 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2982 if (arginfo
[i
].align
)
2983 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2984 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2986 if (i
== (size_t) -1)
2988 if (bestn
== NULL
|| this_badness
< badness
)
2991 badness
= this_badness
;
2998 for (i
= 0; i
< nargs
; i
++)
2999 if ((arginfo
[i
].dt
== vect_constant_def
3000 || arginfo
[i
].dt
== vect_external_def
)
3001 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3004 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3006 if (arginfo
[i
].vectype
== NULL
3007 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3008 > bestn
->simdclone
->simdlen
))
3012 fndecl
= bestn
->decl
;
3013 nunits
= bestn
->simdclone
->simdlen
;
3014 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3016 /* If the function isn't const, only allow it in simd loops where user
3017 has asserted that at least nunits consecutive iterations can be
3018 performed using SIMD instructions. */
3019 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3020 && gimple_vuse (stmt
))
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies
>= 1);
3027 if (!vec_stmt
) /* transformation not required. */
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3030 for (i
= 0; i
< nargs
; i
++)
3031 if ((bestn
->simdclone
->args
[i
].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3033 || (bestn
->simdclone
->args
[i
].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3039 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3040 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3041 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3043 tree sll
= arginfo
[i
].simd_lane_linear
3044 ? boolean_true_node
: boolean_false_node
;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3047 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE
, vect_location
,
3050 "=== vectorizable_simd_clone_call ===\n");
3051 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3057 if (dump_enabled_p ())
3058 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3061 scalar_dest
= gimple_call_lhs (stmt
);
3062 vec_dest
= NULL_TREE
;
3067 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3068 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3069 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3072 rtype
= TREE_TYPE (ratype
);
3076 prev_stmt_info
= NULL
;
3077 for (j
= 0; j
< ncopies
; ++j
)
3079 /* Build argument list for the vectorized call. */
3081 vargs
.create (nargs
);
3085 for (i
= 0; i
< nargs
; i
++)
3087 unsigned int k
, l
, m
, o
;
3089 op
= gimple_call_arg (stmt
, i
);
3090 switch (bestn
->simdclone
->args
[i
].arg_type
)
3092 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3093 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3094 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3095 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3097 if (TYPE_VECTOR_SUBPARTS (atype
)
3098 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3100 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3101 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3102 / TYPE_VECTOR_SUBPARTS (atype
));
3103 gcc_assert ((k
& (k
- 1)) == 0);
3106 = vect_get_vec_def_for_operand (op
, stmt
);
3109 vec_oprnd0
= arginfo
[i
].op
;
3110 if ((m
& (k
- 1)) == 0)
3112 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3115 arginfo
[i
].op
= vec_oprnd0
;
3117 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3119 bitsize_int ((m
& (k
- 1)) * prec
));
3121 = gimple_build_assign (make_ssa_name (atype
),
3123 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3124 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3128 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3129 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3130 gcc_assert ((k
& (k
- 1)) == 0);
3131 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3133 vec_alloc (ctor_elts
, k
);
3136 for (l
= 0; l
< k
; l
++)
3138 if (m
== 0 && l
== 0)
3140 = vect_get_vec_def_for_operand (op
, stmt
);
3143 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3145 arginfo
[i
].op
= vec_oprnd0
;
3148 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3152 vargs
.safe_push (vec_oprnd0
);
3155 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3157 = gimple_build_assign (make_ssa_name (atype
),
3159 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3160 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3165 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3166 vargs
.safe_push (op
);
3168 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3169 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3174 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3179 edge pe
= loop_preheader_edge (loop
);
3180 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3181 gcc_assert (!new_bb
);
3183 if (arginfo
[i
].simd_lane_linear
)
3185 vargs
.safe_push (arginfo
[i
].op
);
3188 tree phi_res
= copy_ssa_name (op
);
3189 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3190 set_vinfo_for_stmt (new_phi
,
3191 new_stmt_vec_info (new_phi
, loop_vinfo
));
3192 add_phi_arg (new_phi
, arginfo
[i
].op
,
3193 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3195 = POINTER_TYPE_P (TREE_TYPE (op
))
3196 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3197 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3198 ? sizetype
: TREE_TYPE (op
);
3200 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3202 tree tcst
= wide_int_to_tree (type
, cst
);
3203 tree phi_arg
= copy_ssa_name (op
);
3205 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3206 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3207 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3208 set_vinfo_for_stmt (new_stmt
,
3209 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3210 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3212 arginfo
[i
].op
= phi_res
;
3213 vargs
.safe_push (phi_res
);
3218 = POINTER_TYPE_P (TREE_TYPE (op
))
3219 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3220 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3221 ? sizetype
: TREE_TYPE (op
);
3223 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3225 tree tcst
= wide_int_to_tree (type
, cst
);
3226 new_temp
= make_ssa_name (TREE_TYPE (op
));
3227 new_stmt
= gimple_build_assign (new_temp
, code
,
3228 arginfo
[i
].op
, tcst
);
3229 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3230 vargs
.safe_push (new_temp
);
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3235 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3244 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3247 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3249 new_temp
= create_tmp_var (ratype
);
3250 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3251 == TYPE_VECTOR_SUBPARTS (rtype
))
3252 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3254 new_temp
= make_ssa_name (rtype
, new_stmt
);
3255 gimple_call_set_lhs (new_stmt
, new_temp
);
3257 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3261 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3264 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3265 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3266 gcc_assert ((k
& (k
- 1)) == 0);
3267 for (l
= 0; l
< k
; l
++)
3272 t
= build_fold_addr_expr (new_temp
);
3273 t
= build2 (MEM_REF
, vectype
, t
,
3274 build_int_cst (TREE_TYPE (t
),
3275 l
* prec
/ BITS_PER_UNIT
));
3278 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3279 size_int (prec
), bitsize_int (l
* prec
));
3281 = gimple_build_assign (make_ssa_name (vectype
), t
);
3282 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3283 if (j
== 0 && l
== 0)
3284 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3286 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3288 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3293 tree clobber
= build_constructor (ratype
, NULL
);
3294 TREE_THIS_VOLATILE (clobber
) = 1;
3295 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3296 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3300 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3302 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3303 / TYPE_VECTOR_SUBPARTS (rtype
));
3304 gcc_assert ((k
& (k
- 1)) == 0);
3305 if ((j
& (k
- 1)) == 0)
3306 vec_alloc (ret_ctor_elts
, k
);
3309 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3310 for (m
= 0; m
< o
; m
++)
3312 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3313 size_int (m
), NULL_TREE
, NULL_TREE
);
3315 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3316 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3318 gimple_assign_lhs (new_stmt
));
3320 tree clobber
= build_constructor (ratype
, NULL
);
3321 TREE_THIS_VOLATILE (clobber
) = 1;
3322 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3323 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3326 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3327 if ((j
& (k
- 1)) != k
- 1)
3329 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3331 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3332 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3334 if ((unsigned) j
== k
- 1)
3335 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3337 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3339 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3344 tree t
= build_fold_addr_expr (new_temp
);
3345 t
= build2 (MEM_REF
, vectype
, t
,
3346 build_int_cst (TREE_TYPE (t
), 0));
3348 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3349 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3350 tree clobber
= build_constructor (ratype
, NULL
);
3351 TREE_THIS_VOLATILE (clobber
) = 1;
3352 vect_finish_stmt_generation (stmt
,
3353 gimple_build_assign (new_temp
,
3359 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3361 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3363 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3368 /* The call in STMT might prevent it from being removed in dce.
3369 We however cannot remove it here, due to the way the ssa name
3370 it defines is mapped to the new definition. So just replace
3371 rhs of the statement with something harmless. */
3378 type
= TREE_TYPE (scalar_dest
);
3379 if (is_pattern_stmt_p (stmt_info
))
3380 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3382 lhs
= gimple_call_lhs (stmt
);
3383 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3386 new_stmt
= gimple_build_nop ();
3387 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3388 set_vinfo_for_stmt (stmt
, NULL
);
3389 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3390 gsi_replace (gsi
, new_stmt
, true);
3391 unlink_stmt_vdef (stmt
);
3397 /* Function vect_gen_widened_results_half
3399 Create a vector stmt whose code, type, number of arguments, and result
3400 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3401 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3402 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3403 needs to be created (DECL is a function-decl of a target-builtin).
3404 STMT is the original scalar stmt that we are vectorizing. */
3407 vect_gen_widened_results_half (enum tree_code code
,
3409 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3410 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3416 /* Generate half of the widened result: */
3417 if (code
== CALL_EXPR
)
3419 /* Target specific support */
3420 if (op_type
== binary_op
)
3421 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3423 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3424 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3425 gimple_call_set_lhs (new_stmt
, new_temp
);
3429 /* Generic support */
3430 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3431 if (op_type
!= binary_op
)
3433 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3434 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3435 gimple_assign_set_lhs (new_stmt
, new_temp
);
3437 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3443 /* Get vectorized definitions for loop-based vectorization. For the first
3444 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3445 scalar operand), and for the rest we get a copy with
3446 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3447 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3448 The vectors are collected into VEC_OPRNDS. */
3451 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3452 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3456 /* Get first vector operand. */
3457 /* All the vector operands except the very first one (that is scalar oprnd)
3459 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3460 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3462 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3464 vec_oprnds
->quick_push (vec_oprnd
);
3466 /* Get second vector operand. */
3467 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3468 vec_oprnds
->quick_push (vec_oprnd
);
3472 /* For conversion in multiple steps, continue to get operands
3475 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3479 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3480 For multi-step conversions store the resulting vectors and call the function
3484 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3485 int multi_step_cvt
, gimple
*stmt
,
3487 gimple_stmt_iterator
*gsi
,
3488 slp_tree slp_node
, enum tree_code code
,
3489 stmt_vec_info
*prev_stmt_info
)
3492 tree vop0
, vop1
, new_tmp
, vec_dest
;
3494 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3496 vec_dest
= vec_dsts
.pop ();
3498 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3500 /* Create demotion operation. */
3501 vop0
= (*vec_oprnds
)[i
];
3502 vop1
= (*vec_oprnds
)[i
+ 1];
3503 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3504 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3505 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3506 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3509 /* Store the resulting vector for next recursive call. */
3510 (*vec_oprnds
)[i
/2] = new_tmp
;
3513 /* This is the last step of the conversion sequence. Store the
3514 vectors in SLP_NODE or in vector info of the scalar statement
3515 (or in STMT_VINFO_RELATED_STMT chain). */
3517 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3520 if (!*prev_stmt_info
)
3521 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3523 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3525 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3530 /* For multi-step demotion operations we first generate demotion operations
3531 from the source type to the intermediate types, and then combine the
3532 results (stored in VEC_OPRNDS) in demotion operation to the destination
3536 /* At each level of recursion we have half of the operands we had at the
3538 vec_oprnds
->truncate ((i
+1)/2);
3539 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3540 stmt
, vec_dsts
, gsi
, slp_node
,
3541 VEC_PACK_TRUNC_EXPR
,
3545 vec_dsts
.quick_push (vec_dest
);
3549 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3550 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3551 the resulting vectors and call the function recursively. */
3554 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3555 vec
<tree
> *vec_oprnds1
,
3556 gimple
*stmt
, tree vec_dest
,
3557 gimple_stmt_iterator
*gsi
,
3558 enum tree_code code1
,
3559 enum tree_code code2
, tree decl1
,
3560 tree decl2
, int op_type
)
3563 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3564 gimple
*new_stmt1
, *new_stmt2
;
3565 vec
<tree
> vec_tmp
= vNULL
;
3567 vec_tmp
.create (vec_oprnds0
->length () * 2);
3568 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3570 if (op_type
== binary_op
)
3571 vop1
= (*vec_oprnds1
)[i
];
3575 /* Generate the two halves of promotion operation. */
3576 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3577 op_type
, vec_dest
, gsi
, stmt
);
3578 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3579 op_type
, vec_dest
, gsi
, stmt
);
3580 if (is_gimple_call (new_stmt1
))
3582 new_tmp1
= gimple_call_lhs (new_stmt1
);
3583 new_tmp2
= gimple_call_lhs (new_stmt2
);
3587 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3588 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3591 /* Store the results for the next step. */
3592 vec_tmp
.quick_push (new_tmp1
);
3593 vec_tmp
.quick_push (new_tmp2
);
3596 vec_oprnds0
->release ();
3597 *vec_oprnds0
= vec_tmp
;
3601 /* Check if STMT performs a conversion operation, that can be vectorized.
3602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3603 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3607 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3608 gimple
**vec_stmt
, slp_tree slp_node
)
3612 tree op0
, op1
= NULL_TREE
;
3613 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3614 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3615 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3616 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3617 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3618 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3621 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3622 gimple
*new_stmt
= NULL
;
3623 stmt_vec_info prev_stmt_info
;
3626 tree vectype_out
, vectype_in
;
3628 tree lhs_type
, rhs_type
;
3629 enum { NARROW
, NONE
, WIDEN
} modifier
;
3630 vec
<tree
> vec_oprnds0
= vNULL
;
3631 vec
<tree
> vec_oprnds1
= vNULL
;
3633 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3634 vec_info
*vinfo
= stmt_info
->vinfo
;
3635 int multi_step_cvt
= 0;
3636 vec
<tree
> vec_dsts
= vNULL
;
3637 vec
<tree
> interm_types
= vNULL
;
3638 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3640 machine_mode rhs_mode
;
3641 unsigned short fltsz
;
3643 /* Is STMT a vectorizable conversion? */
3645 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3648 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3652 if (!is_gimple_assign (stmt
))
3655 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3658 code
= gimple_assign_rhs_code (stmt
);
3659 if (!CONVERT_EXPR_CODE_P (code
)
3660 && code
!= FIX_TRUNC_EXPR
3661 && code
!= FLOAT_EXPR
3662 && code
!= WIDEN_MULT_EXPR
3663 && code
!= WIDEN_LSHIFT_EXPR
)
3666 op_type
= TREE_CODE_LENGTH (code
);
3668 /* Check types of lhs and rhs. */
3669 scalar_dest
= gimple_assign_lhs (stmt
);
3670 lhs_type
= TREE_TYPE (scalar_dest
);
3671 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3673 op0
= gimple_assign_rhs1 (stmt
);
3674 rhs_type
= TREE_TYPE (op0
);
3676 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3677 && !((INTEGRAL_TYPE_P (lhs_type
)
3678 && INTEGRAL_TYPE_P (rhs_type
))
3679 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3680 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3683 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3684 && ((INTEGRAL_TYPE_P (lhs_type
)
3685 && (TYPE_PRECISION (lhs_type
)
3686 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3687 || (INTEGRAL_TYPE_P (rhs_type
)
3688 && (TYPE_PRECISION (rhs_type
)
3689 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3693 "type conversion to/from bit-precision unsupported."
3698 /* Check the operands of the operation. */
3699 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
3701 if (dump_enabled_p ())
3702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3703 "use not simple.\n");
3706 if (op_type
== binary_op
)
3710 op1
= gimple_assign_rhs2 (stmt
);
3711 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3712 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3714 if (CONSTANT_CLASS_P (op0
))
3715 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
3717 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
3721 if (dump_enabled_p ())
3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3723 "use not simple.\n");
3728 /* If op0 is an external or constant defs use a vector type of
3729 the same size as the output vector type. */
3731 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3733 gcc_assert (vectype_in
);
3736 if (dump_enabled_p ())
3738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3739 "no vectype for scalar type ");
3740 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3741 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3747 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3748 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3750 if (dump_enabled_p ())
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3753 "can't convert between boolean and non "
3755 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3756 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3762 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3763 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3764 if (nunits_in
< nunits_out
)
3766 else if (nunits_out
== nunits_in
)
3771 /* Multiple types in SLP are handled by creating the appropriate number of
3772 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3776 else if (modifier
== NARROW
)
3777 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3779 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3781 /* Sanity check: make sure that at least one copy of the vectorized stmt
3782 needs to be generated. */
3783 gcc_assert (ncopies
>= 1);
3785 /* Supportable by target? */
3789 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3791 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3798 "conversion not supported by target.\n");
3802 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3803 &code1
, &code2
, &multi_step_cvt
,
3806 /* Binary widening operation can only be supported directly by the
3808 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3812 if (code
!= FLOAT_EXPR
3813 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3814 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3817 rhs_mode
= TYPE_MODE (rhs_type
);
3818 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3819 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3820 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3821 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3824 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3825 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3826 if (cvt_type
== NULL_TREE
)
3829 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3831 if (!supportable_convert_operation (code
, vectype_out
,
3832 cvt_type
, &decl1
, &codecvt1
))
3835 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3836 cvt_type
, &codecvt1
,
3837 &codecvt2
, &multi_step_cvt
,
3841 gcc_assert (multi_step_cvt
== 0);
3843 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3844 vectype_in
, &code1
, &code2
,
3845 &multi_step_cvt
, &interm_types
))
3849 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3852 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3853 codecvt2
= ERROR_MARK
;
3857 interm_types
.safe_push (cvt_type
);
3858 cvt_type
= NULL_TREE
;
3863 gcc_assert (op_type
== unary_op
);
3864 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3865 &code1
, &multi_step_cvt
,
3869 if (code
!= FIX_TRUNC_EXPR
3870 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3871 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3874 rhs_mode
= TYPE_MODE (rhs_type
);
3876 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3877 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3878 if (cvt_type
== NULL_TREE
)
3880 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3883 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3884 &code1
, &multi_step_cvt
,
3893 if (!vec_stmt
) /* transformation not required. */
3895 if (dump_enabled_p ())
3896 dump_printf_loc (MSG_NOTE
, vect_location
,
3897 "=== vectorizable_conversion ===\n");
3898 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3900 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3901 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3903 else if (modifier
== NARROW
)
3905 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3906 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3910 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3911 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3913 interm_types
.release ();
3918 if (dump_enabled_p ())
3919 dump_printf_loc (MSG_NOTE
, vect_location
,
3920 "transform conversion. ncopies = %d.\n", ncopies
);
3922 if (op_type
== binary_op
)
3924 if (CONSTANT_CLASS_P (op0
))
3925 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3926 else if (CONSTANT_CLASS_P (op1
))
3927 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3930 /* In case of multi-step conversion, we first generate conversion operations
3931 to the intermediate types, and then from that types to the final one.
3932 We create vector destinations for the intermediate type (TYPES) received
3933 from supportable_*_operation, and store them in the correct order
3934 for future use in vect_create_vectorized_*_stmts (). */
3935 vec_dsts
.create (multi_step_cvt
+ 1);
3936 vec_dest
= vect_create_destination_var (scalar_dest
,
3937 (cvt_type
&& modifier
== WIDEN
)
3938 ? cvt_type
: vectype_out
);
3939 vec_dsts
.quick_push (vec_dest
);
3943 for (i
= interm_types
.length () - 1;
3944 interm_types
.iterate (i
, &intermediate_type
); i
--)
3946 vec_dest
= vect_create_destination_var (scalar_dest
,
3948 vec_dsts
.quick_push (vec_dest
);
3953 vec_dest
= vect_create_destination_var (scalar_dest
,
3955 ? vectype_out
: cvt_type
);
3959 if (modifier
== WIDEN
)
3961 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3962 if (op_type
== binary_op
)
3963 vec_oprnds1
.create (1);
3965 else if (modifier
== NARROW
)
3966 vec_oprnds0
.create (
3967 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3969 else if (code
== WIDEN_LSHIFT_EXPR
)
3970 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3973 prev_stmt_info
= NULL
;
3977 for (j
= 0; j
< ncopies
; j
++)
3980 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3983 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3985 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3987 /* Arguments are ready, create the new vector stmt. */
3988 if (code1
== CALL_EXPR
)
3990 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3991 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3992 gimple_call_set_lhs (new_stmt
, new_temp
);
3996 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3997 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3998 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3999 gimple_assign_set_lhs (new_stmt
, new_temp
);
4002 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4004 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4007 if (!prev_stmt_info
)
4008 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4011 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4018 /* In case the vectorization factor (VF) is bigger than the number
4019 of elements that we can fit in a vectype (nunits), we have to
4020 generate more than one vector stmt - i.e - we need to "unroll"
4021 the vector stmt by a factor VF/nunits. */
4022 for (j
= 0; j
< ncopies
; j
++)
4029 if (code
== WIDEN_LSHIFT_EXPR
)
4034 /* Store vec_oprnd1 for every vector stmt to be created
4035 for SLP_NODE. We check during the analysis that all
4036 the shift arguments are the same. */
4037 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4038 vec_oprnds1
.quick_push (vec_oprnd1
);
4040 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4044 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4045 &vec_oprnds1
, slp_node
, -1);
4049 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4050 vec_oprnds0
.quick_push (vec_oprnd0
);
4051 if (op_type
== binary_op
)
4053 if (code
== WIDEN_LSHIFT_EXPR
)
4056 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4057 vec_oprnds1
.quick_push (vec_oprnd1
);
4063 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4064 vec_oprnds0
.truncate (0);
4065 vec_oprnds0
.quick_push (vec_oprnd0
);
4066 if (op_type
== binary_op
)
4068 if (code
== WIDEN_LSHIFT_EXPR
)
4071 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4073 vec_oprnds1
.truncate (0);
4074 vec_oprnds1
.quick_push (vec_oprnd1
);
4078 /* Arguments are ready. Create the new vector stmts. */
4079 for (i
= multi_step_cvt
; i
>= 0; i
--)
4081 tree this_dest
= vec_dsts
[i
];
4082 enum tree_code c1
= code1
, c2
= code2
;
4083 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4088 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4090 stmt
, this_dest
, gsi
,
4091 c1
, c2
, decl1
, decl2
,
4095 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4099 if (codecvt1
== CALL_EXPR
)
4101 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4102 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4103 gimple_call_set_lhs (new_stmt
, new_temp
);
4107 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4108 new_temp
= make_ssa_name (vec_dest
);
4109 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4113 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4116 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4119 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4122 if (!prev_stmt_info
)
4123 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4125 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4126 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4131 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4135 /* In case the vectorization factor (VF) is bigger than the number
4136 of elements that we can fit in a vectype (nunits), we have to
4137 generate more than one vector stmt - i.e - we need to "unroll"
4138 the vector stmt by a factor VF/nunits. */
4139 for (j
= 0; j
< ncopies
; j
++)
4143 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4147 vec_oprnds0
.truncate (0);
4148 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4149 vect_pow2 (multi_step_cvt
) - 1);
4152 /* Arguments are ready. Create the new vector stmts. */
4154 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4156 if (codecvt1
== CALL_EXPR
)
4158 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4159 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4160 gimple_call_set_lhs (new_stmt
, new_temp
);
4164 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4165 new_temp
= make_ssa_name (vec_dest
);
4166 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4170 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4171 vec_oprnds0
[i
] = new_temp
;
4174 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4175 stmt
, vec_dsts
, gsi
,
4180 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4184 vec_oprnds0
.release ();
4185 vec_oprnds1
.release ();
4186 vec_dsts
.release ();
4187 interm_types
.release ();
4193 /* Function vectorizable_assignment.
4195 Check if STMT performs an assignment (copy) that can be vectorized.
4196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4201 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4202 gimple
**vec_stmt
, slp_tree slp_node
)
4207 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4211 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4214 vec
<tree
> vec_oprnds
= vNULL
;
4216 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4217 vec_info
*vinfo
= stmt_info
->vinfo
;
4218 gimple
*new_stmt
= NULL
;
4219 stmt_vec_info prev_stmt_info
= NULL
;
4220 enum tree_code code
;
4223 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4226 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4230 /* Is vectorizable assignment? */
4231 if (!is_gimple_assign (stmt
))
4234 scalar_dest
= gimple_assign_lhs (stmt
);
4235 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4238 code
= gimple_assign_rhs_code (stmt
);
4239 if (gimple_assign_single_p (stmt
)
4240 || code
== PAREN_EXPR
4241 || CONVERT_EXPR_CODE_P (code
))
4242 op
= gimple_assign_rhs1 (stmt
);
4246 if (code
== VIEW_CONVERT_EXPR
)
4247 op
= TREE_OPERAND (op
, 0);
4249 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4250 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4252 /* Multiple types in SLP are handled by creating the appropriate number of
4253 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4258 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4260 gcc_assert (ncopies
>= 1);
4262 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4264 if (dump_enabled_p ())
4265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4266 "use not simple.\n");
4270 /* We can handle NOP_EXPR conversions that do not change the number
4271 of elements or the vector size. */
4272 if ((CONVERT_EXPR_CODE_P (code
)
4273 || code
== VIEW_CONVERT_EXPR
)
4275 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4276 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4277 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4280 /* We do not handle bit-precision changes. */
4281 if ((CONVERT_EXPR_CODE_P (code
)
4282 || code
== VIEW_CONVERT_EXPR
)
4283 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4284 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4285 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4286 || ((TYPE_PRECISION (TREE_TYPE (op
))
4287 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4288 /* But a conversion that does not change the bit-pattern is ok. */
4289 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4290 > TYPE_PRECISION (TREE_TYPE (op
)))
4291 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4292 /* Conversion between boolean types of different sizes is
4293 a simple assignment in case their vectypes are same
4295 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4296 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4300 "type conversion to/from bit-precision "
4305 if (!vec_stmt
) /* transformation not required. */
4307 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_NOTE
, vect_location
,
4310 "=== vectorizable_assignment ===\n");
4311 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4316 if (dump_enabled_p ())
4317 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4320 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4323 for (j
= 0; j
< ncopies
; j
++)
4327 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4329 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4331 /* Arguments are ready. create the new vector stmt. */
4332 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4334 if (CONVERT_EXPR_CODE_P (code
)
4335 || code
== VIEW_CONVERT_EXPR
)
4336 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4337 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4338 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4339 gimple_assign_set_lhs (new_stmt
, new_temp
);
4340 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4342 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4349 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4351 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4353 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4356 vec_oprnds
.release ();
4361 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4362 either as shift by a scalar or by a vector. */
4365 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4368 machine_mode vec_mode
;
4373 vectype
= get_vectype_for_scalar_type (scalar_type
);
4377 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4379 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4381 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4383 || (optab_handler (optab
, TYPE_MODE (vectype
))
4384 == CODE_FOR_nothing
))
4388 vec_mode
= TYPE_MODE (vectype
);
4389 icode
= (int) optab_handler (optab
, vec_mode
);
4390 if (icode
== CODE_FOR_nothing
)
4397 /* Function vectorizable_shift.
4399 Check if STMT performs a shift operation that can be vectorized.
4400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4405 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4406 gimple
**vec_stmt
, slp_tree slp_node
)
4410 tree op0
, op1
= NULL
;
4411 tree vec_oprnd1
= NULL_TREE
;
4412 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4414 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4415 enum tree_code code
;
4416 machine_mode vec_mode
;
4420 machine_mode optab_op2_mode
;
4422 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4423 gimple
*new_stmt
= NULL
;
4424 stmt_vec_info prev_stmt_info
;
4431 vec
<tree
> vec_oprnds0
= vNULL
;
4432 vec
<tree
> vec_oprnds1
= vNULL
;
4435 bool scalar_shift_arg
= true;
4436 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4437 vec_info
*vinfo
= stmt_info
->vinfo
;
4440 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4443 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4447 /* Is STMT a vectorizable binary/unary operation? */
4448 if (!is_gimple_assign (stmt
))
4451 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4454 code
= gimple_assign_rhs_code (stmt
);
4456 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4457 || code
== RROTATE_EXPR
))
4460 scalar_dest
= gimple_assign_lhs (stmt
);
4461 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4462 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4463 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4467 "bit-precision shifts not supported.\n");
4471 op0
= gimple_assign_rhs1 (stmt
);
4472 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4474 if (dump_enabled_p ())
4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4476 "use not simple.\n");
4479 /* If op0 is an external or constant def use a vector type with
4480 the same size as the output vector type. */
4482 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4484 gcc_assert (vectype
);
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4489 "no vectype for scalar type\n");
4493 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4494 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4495 if (nunits_out
!= nunits_in
)
4498 op1
= gimple_assign_rhs2 (stmt
);
4499 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4503 "use not simple.\n");
4508 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4512 /* Multiple types in SLP are handled by creating the appropriate number of
4513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4518 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4520 gcc_assert (ncopies
>= 1);
4522 /* Determine whether the shift amount is a vector, or scalar. If the
4523 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4525 if ((dt
[1] == vect_internal_def
4526 || dt
[1] == vect_induction_def
)
4528 scalar_shift_arg
= false;
4529 else if (dt
[1] == vect_constant_def
4530 || dt
[1] == vect_external_def
4531 || dt
[1] == vect_internal_def
)
4533 /* In SLP, need to check whether the shift count is the same,
4534 in loops if it is a constant or invariant, it is always
4538 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4541 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4542 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4543 scalar_shift_arg
= false;
4546 /* If the shift amount is computed by a pattern stmt we cannot
4547 use the scalar amount directly thus give up and use a vector
4549 if (dt
[1] == vect_internal_def
)
4551 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4552 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4553 scalar_shift_arg
= false;
4558 if (dump_enabled_p ())
4559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4560 "operand mode requires invariant argument.\n");
4564 /* Vector shifted by vector. */
4565 if (!scalar_shift_arg
)
4567 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4568 if (dump_enabled_p ())
4569 dump_printf_loc (MSG_NOTE
, vect_location
,
4570 "vector/vector shift/rotate found.\n");
4573 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4574 if (op1_vectype
== NULL_TREE
4575 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4579 "unusable type for last operand in"
4580 " vector/vector shift/rotate.\n");
4584 /* See if the machine has a vector shifted by scalar insn and if not
4585 then see if it has a vector shifted by vector insn. */
4588 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4590 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_NOTE
, vect_location
,
4594 "vector/scalar shift/rotate found.\n");
4598 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4600 && (optab_handler (optab
, TYPE_MODE (vectype
))
4601 != CODE_FOR_nothing
))
4603 scalar_shift_arg
= false;
4605 if (dump_enabled_p ())
4606 dump_printf_loc (MSG_NOTE
, vect_location
,
4607 "vector/vector shift/rotate found.\n");
4609 /* Unlike the other binary operators, shifts/rotates have
4610 the rhs being int, instead of the same type as the lhs,
4611 so make sure the scalar is the right type if we are
4612 dealing with vectors of long long/long/short/char. */
4613 if (dt
[1] == vect_constant_def
)
4614 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4615 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4619 && TYPE_MODE (TREE_TYPE (vectype
))
4620 != TYPE_MODE (TREE_TYPE (op1
)))
4622 if (dump_enabled_p ())
4623 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4624 "unusable type for last operand in"
4625 " vector/vector shift/rotate.\n");
4628 if (vec_stmt
&& !slp_node
)
4630 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4631 op1
= vect_init_vector (stmt
, op1
,
4632 TREE_TYPE (vectype
), NULL
);
4639 /* Supportable by target? */
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4647 vec_mode
= TYPE_MODE (vectype
);
4648 icode
= (int) optab_handler (optab
, vec_mode
);
4649 if (icode
== CODE_FOR_nothing
)
4651 if (dump_enabled_p ())
4652 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4653 "op not supported by target.\n");
4654 /* Check only during analysis. */
4655 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4656 || (vf
< vect_min_worthwhile_factor (code
)
4659 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_NOTE
, vect_location
,
4661 "proceeding using word mode.\n");
4664 /* Worthwhile without SIMD support? Check only during analysis. */
4665 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4666 && vf
< vect_min_worthwhile_factor (code
)
4669 if (dump_enabled_p ())
4670 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4671 "not worthwhile without SIMD support.\n");
4675 if (!vec_stmt
) /* transformation not required. */
4677 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_NOTE
, vect_location
,
4680 "=== vectorizable_shift ===\n");
4681 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_NOTE
, vect_location
,
4689 "transform binary/unary operation.\n");
4692 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4694 prev_stmt_info
= NULL
;
4695 for (j
= 0; j
< ncopies
; j
++)
4700 if (scalar_shift_arg
)
4702 /* Vector shl and shr insn patterns can be defined with scalar
4703 operand 2 (shift operand). In this case, use constant or loop
4704 invariant op1 directly, without extending it to vector mode
4706 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4707 if (!VECTOR_MODE_P (optab_op2_mode
))
4709 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_NOTE
, vect_location
,
4711 "operand 1 using scalar mode.\n");
4713 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4714 vec_oprnds1
.quick_push (vec_oprnd1
);
4717 /* Store vec_oprnd1 for every vector stmt to be created
4718 for SLP_NODE. We check during the analysis that all
4719 the shift arguments are the same.
4720 TODO: Allow different constants for different vector
4721 stmts generated for an SLP instance. */
4722 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4723 vec_oprnds1
.quick_push (vec_oprnd1
);
4728 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4729 (a special case for certain kind of vector shifts); otherwise,
4730 operand 1 should be of a vector type (the usual case). */
4732 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4735 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4739 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4741 /* Arguments are ready. Create the new vector stmt. */
4742 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4744 vop1
= vec_oprnds1
[i
];
4745 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4746 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4747 gimple_assign_set_lhs (new_stmt
, new_temp
);
4748 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4750 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4757 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4759 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4760 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4763 vec_oprnds0
.release ();
4764 vec_oprnds1
.release ();
4770 /* Function vectorizable_operation.
4772 Check if STMT performs a binary, unary or ternary operation that can
4774 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4775 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4776 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4779 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4780 gimple
**vec_stmt
, slp_tree slp_node
)
4784 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4785 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4787 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4788 enum tree_code code
;
4789 machine_mode vec_mode
;
4793 bool target_support_p
;
4795 enum vect_def_type dt
[3]
4796 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4797 gimple
*new_stmt
= NULL
;
4798 stmt_vec_info prev_stmt_info
;
4804 vec
<tree
> vec_oprnds0
= vNULL
;
4805 vec
<tree
> vec_oprnds1
= vNULL
;
4806 vec
<tree
> vec_oprnds2
= vNULL
;
4807 tree vop0
, vop1
, vop2
;
4808 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4809 vec_info
*vinfo
= stmt_info
->vinfo
;
4812 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4815 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4819 /* Is STMT a vectorizable binary/unary operation? */
4820 if (!is_gimple_assign (stmt
))
4823 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4826 code
= gimple_assign_rhs_code (stmt
);
4828 /* For pointer addition, we should use the normal plus for
4829 the vector addition. */
4830 if (code
== POINTER_PLUS_EXPR
)
4833 /* Support only unary or binary operations. */
4834 op_type
= TREE_CODE_LENGTH (code
);
4835 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4837 if (dump_enabled_p ())
4838 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4839 "num. args = %d (not unary/binary/ternary op).\n",
4844 scalar_dest
= gimple_assign_lhs (stmt
);
4845 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4847 /* Most operations cannot handle bit-precision types without extra
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4850 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4851 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4852 /* Exception are bitwise binary operations. */
4853 && code
!= BIT_IOR_EXPR
4854 && code
!= BIT_XOR_EXPR
4855 && code
!= BIT_AND_EXPR
)
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4859 "bit-precision arithmetic not supported.\n");
4863 op0
= gimple_assign_rhs1 (stmt
);
4864 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4866 if (dump_enabled_p ())
4867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4868 "use not simple.\n");
4871 /* If op0 is an external or constant def use a vector type with
4872 the same size as the output vector type. */
4875 /* For boolean type we cannot determine vectype by
4876 invariant value (don't know whether it is a vector
4877 of booleans or vector of integers). We use output
4878 vectype because operations on boolean don't change
4880 if (TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
)
4882 if (TREE_CODE (TREE_TYPE (scalar_dest
)) != BOOLEAN_TYPE
)
4884 if (dump_enabled_p ())
4885 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4886 "not supported operation on bool value.\n");
4889 vectype
= vectype_out
;
4892 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4895 gcc_assert (vectype
);
4898 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4901 "no vectype for scalar type ");
4902 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4904 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4910 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4911 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4912 if (nunits_out
!= nunits_in
)
4915 if (op_type
== binary_op
|| op_type
== ternary_op
)
4917 op1
= gimple_assign_rhs2 (stmt
);
4918 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
4920 if (dump_enabled_p ())
4921 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4922 "use not simple.\n");
4926 if (op_type
== ternary_op
)
4928 op2
= gimple_assign_rhs3 (stmt
);
4929 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
4931 if (dump_enabled_p ())
4932 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4933 "use not simple.\n");
4939 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4943 /* Multiple types in SLP are handled by creating the appropriate number of
4944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4949 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4951 gcc_assert (ncopies
>= 1);
4953 /* Shifts are handled in vectorizable_shift (). */
4954 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4955 || code
== RROTATE_EXPR
)
4958 /* Supportable by target? */
4960 vec_mode
= TYPE_MODE (vectype
);
4961 if (code
== MULT_HIGHPART_EXPR
)
4962 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
4965 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4968 if (dump_enabled_p ())
4969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4973 target_support_p
= (optab_handler (optab
, vec_mode
)
4974 != CODE_FOR_nothing
);
4977 if (!target_support_p
)
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4981 "op not supported by target.\n");
4982 /* Check only during analysis. */
4983 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4984 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_NOTE
, vect_location
,
4988 "proceeding using word mode.\n");
4991 /* Worthwhile without SIMD support? Check only during analysis. */
4992 if (!VECTOR_MODE_P (vec_mode
)
4994 && vf
< vect_min_worthwhile_factor (code
))
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4998 "not worthwhile without SIMD support.\n");
5002 if (!vec_stmt
) /* transformation not required. */
5004 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5005 if (dump_enabled_p ())
5006 dump_printf_loc (MSG_NOTE
, vect_location
,
5007 "=== vectorizable_operation ===\n");
5008 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5014 if (dump_enabled_p ())
5015 dump_printf_loc (MSG_NOTE
, vect_location
,
5016 "transform binary/unary operation.\n");
5019 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5021 /* In case the vectorization factor (VF) is bigger than the number
5022 of elements that we can fit in a vectype (nunits), we have to generate
5023 more than one vector stmt - i.e - we need to "unroll" the
5024 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5025 from one copy of the vector stmt to the next, in the field
5026 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5027 stages to find the correct vector defs to be used when vectorizing
5028 stmts that use the defs of the current stmt. The example below
5029 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5030 we need to create 4 vectorized stmts):
5032 before vectorization:
5033 RELATED_STMT VEC_STMT
5037 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5039 RELATED_STMT VEC_STMT
5040 VS1_0: vx0 = memref0 VS1_1 -
5041 VS1_1: vx1 = memref1 VS1_2 -
5042 VS1_2: vx2 = memref2 VS1_3 -
5043 VS1_3: vx3 = memref3 - -
5044 S1: x = load - VS1_0
5047 step2: vectorize stmt S2 (done here):
5048 To vectorize stmt S2 we first need to find the relevant vector
5049 def for the first operand 'x'. This is, as usual, obtained from
5050 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5051 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5052 relevant vector def 'vx0'. Having found 'vx0' we can generate
5053 the vector stmt VS2_0, and as usual, record it in the
5054 STMT_VINFO_VEC_STMT of stmt S2.
5055 When creating the second copy (VS2_1), we obtain the relevant vector
5056 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5057 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5058 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5059 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5060 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5061 chain of stmts and pointers:
5062 RELATED_STMT VEC_STMT
5063 VS1_0: vx0 = memref0 VS1_1 -
5064 VS1_1: vx1 = memref1 VS1_2 -
5065 VS1_2: vx2 = memref2 VS1_3 -
5066 VS1_3: vx3 = memref3 - -
5067 S1: x = load - VS1_0
5068 VS2_0: vz0 = vx0 + v1 VS2_1 -
5069 VS2_1: vz1 = vx1 + v1 VS2_2 -
5070 VS2_2: vz2 = vx2 + v1 VS2_3 -
5071 VS2_3: vz3 = vx3 + v1 - -
5072 S2: z = x + 1 - VS2_0 */
5074 prev_stmt_info
= NULL
;
5075 for (j
= 0; j
< ncopies
; j
++)
5080 if (op_type
== binary_op
|| op_type
== ternary_op
)
5081 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5084 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5086 if (op_type
== ternary_op
)
5088 vec_oprnds2
.create (1);
5089 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
5095 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5096 if (op_type
== ternary_op
)
5098 tree vec_oprnd
= vec_oprnds2
.pop ();
5099 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5104 /* Arguments are ready. Create the new vector stmt. */
5105 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5107 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5108 ? vec_oprnds1
[i
] : NULL_TREE
);
5109 vop2
= ((op_type
== ternary_op
)
5110 ? vec_oprnds2
[i
] : NULL_TREE
);
5111 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5112 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5113 gimple_assign_set_lhs (new_stmt
, new_temp
);
5114 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5116 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5123 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5125 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5126 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5129 vec_oprnds0
.release ();
5130 vec_oprnds1
.release ();
5131 vec_oprnds2
.release ();
5136 /* A helper function to ensure data reference DR's base alignment
5140 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5145 if (DR_VECT_AUX (dr
)->base_misaligned
)
5147 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5148 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5150 if (decl_in_symtab_p (base_decl
))
5151 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5154 SET_DECL_ALIGN (base_decl
, TYPE_ALIGN (vectype
));
5155 DECL_USER_ALIGN (base_decl
) = 1;
5157 DR_VECT_AUX (dr
)->base_misaligned
= false;
5162 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5163 reversal of the vector elements. If that is impossible to do,
5167 perm_mask_for_reverse (tree vectype
)
5172 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5173 sel
= XALLOCAVEC (unsigned char, nunits
);
5175 for (i
= 0; i
< nunits
; ++i
)
5176 sel
[i
] = nunits
- 1 - i
;
5178 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5180 return vect_gen_perm_mask_checked (vectype
, sel
);
5183 /* Function vectorizable_store.
5185 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5187 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5188 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5192 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5198 tree vec_oprnd
= NULL_TREE
;
5199 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5200 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5202 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5203 struct loop
*loop
= NULL
;
5204 machine_mode vec_mode
;
5206 enum dr_alignment_support alignment_support_scheme
;
5208 enum vect_def_type dt
;
5209 stmt_vec_info prev_stmt_info
= NULL
;
5210 tree dataref_ptr
= NULL_TREE
;
5211 tree dataref_offset
= NULL_TREE
;
5212 gimple
*ptr_incr
= NULL
;
5215 gimple
*next_stmt
, *first_stmt
= NULL
;
5216 bool grouped_store
= false;
5217 bool store_lanes_p
= false;
5218 unsigned int group_size
, i
;
5219 vec
<tree
> dr_chain
= vNULL
;
5220 vec
<tree
> oprnds
= vNULL
;
5221 vec
<tree
> result_chain
= vNULL
;
5223 bool negative
= false;
5224 tree offset
= NULL_TREE
;
5225 vec
<tree
> vec_oprnds
= vNULL
;
5226 bool slp
= (slp_node
!= NULL
);
5227 unsigned int vec_num
;
5228 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5229 vec_info
*vinfo
= stmt_info
->vinfo
;
5231 tree scatter_base
= NULL_TREE
, scatter_off
= NULL_TREE
;
5232 tree scatter_off_vectype
= NULL_TREE
, scatter_decl
= NULL_TREE
;
5233 int scatter_scale
= 1;
5234 enum vect_def_type scatter_idx_dt
= vect_unknown_def_type
;
5235 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5239 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5242 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5246 /* Is vectorizable store? */
5248 if (!is_gimple_assign (stmt
))
5251 scalar_dest
= gimple_assign_lhs (stmt
);
5252 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5253 && is_pattern_stmt_p (stmt_info
))
5254 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5255 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5256 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5257 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5258 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5259 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5260 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5261 && TREE_CODE (scalar_dest
) != MEM_REF
)
5264 /* Cannot have hybrid store SLP -- that would mean storing to the
5265 same location twice. */
5266 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5268 gcc_assert (gimple_assign_single_p (stmt
));
5270 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5271 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5275 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5276 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5281 /* Multiple types in SLP are handled by creating the appropriate number of
5282 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5287 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5289 gcc_assert (ncopies
>= 1);
5291 /* FORNOW. This restriction should be relaxed. */
5292 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5294 if (dump_enabled_p ())
5295 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5296 "multiple types in nested loop.\n");
5300 op
= gimple_assign_rhs1 (stmt
);
5302 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5306 "use not simple.\n");
5310 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5313 elem_type
= TREE_TYPE (vectype
);
5314 vec_mode
= TYPE_MODE (vectype
);
5316 /* FORNOW. In some cases can vectorize even if data-type not supported
5317 (e.g. - array initialization with 0). */
5318 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5321 if (!STMT_VINFO_DATA_REF (stmt_info
))
5324 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5327 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5328 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5329 size_zero_node
) < 0;
5330 if (negative
&& ncopies
> 1)
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5334 "multiple types with negative step.\n");
5339 gcc_assert (!grouped_store
);
5340 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5341 if (alignment_support_scheme
!= dr_aligned
5342 && alignment_support_scheme
!= dr_unaligned_supported
)
5344 if (dump_enabled_p ())
5345 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5346 "negative step but alignment required.\n");
5349 if (dt
!= vect_constant_def
5350 && dt
!= vect_external_def
5351 && !perm_mask_for_reverse (vectype
))
5353 if (dump_enabled_p ())
5354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5355 "negative step and reversing not supported.\n");
5361 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5363 grouped_store
= true;
5364 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5365 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5366 if (!slp
&& !STMT_VINFO_STRIDED_P (stmt_info
))
5368 if (vect_store_lanes_supported (vectype
, group_size
))
5369 store_lanes_p
= true;
5370 else if (!vect_grouped_store_supported (vectype
, group_size
))
5374 if (first_stmt
== stmt
)
5376 /* STMT is the leader of the group. Check the operands of all the
5377 stmts of the group. */
5378 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5381 gcc_assert (gimple_assign_single_p (next_stmt
));
5382 op
= gimple_assign_rhs1 (next_stmt
);
5383 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5387 "use not simple.\n");
5390 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5395 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5398 scatter_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &scatter_base
,
5399 &scatter_off
, &scatter_scale
);
5400 gcc_assert (scatter_decl
);
5401 if (!vect_is_simple_use (scatter_off
, vinfo
, &def_stmt
, &scatter_idx_dt
,
5402 &scatter_off_vectype
))
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5406 "scatter index use not simple.");
5411 if (!vec_stmt
) /* transformation not required. */
5413 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5414 /* The SLP costs are calculated during SLP analysis. */
5415 if (!PURE_SLP_STMT (stmt_info
))
5416 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5423 ensure_base_align (stmt_info
, dr
);
5425 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5427 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5428 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (scatter_decl
));
5429 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5430 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5431 edge pe
= loop_preheader_edge (loop
);
5434 enum { NARROW
, NONE
, WIDEN
} modifier
;
5435 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (scatter_off_vectype
);
5437 if (nunits
== (unsigned int) scatter_off_nunits
)
5439 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5441 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5444 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5445 sel
[i
] = i
| nunits
;
5447 perm_mask
= vect_gen_perm_mask_checked (scatter_off_vectype
, sel
);
5448 gcc_assert (perm_mask
!= NULL_TREE
);
5450 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5452 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5455 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5456 sel
[i
] = i
| scatter_off_nunits
;
5458 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5459 gcc_assert (perm_mask
!= NULL_TREE
);
5465 rettype
= TREE_TYPE (TREE_TYPE (scatter_decl
));
5466 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5467 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5468 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5469 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5470 scaletype
= TREE_VALUE (arglist
);
5472 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5473 && TREE_CODE (rettype
) == VOID_TYPE
);
5475 ptr
= fold_convert (ptrtype
, scatter_base
);
5476 if (!is_gimple_min_invariant (ptr
))
5478 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5479 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5480 gcc_assert (!new_bb
);
5483 /* Currently we support only unconditional scatter stores,
5484 so mask should be all ones. */
5485 mask
= build_int_cst (masktype
, -1);
5486 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5488 scale
= build_int_cst (scaletype
, scatter_scale
);
5490 prev_stmt_info
= NULL
;
5491 for (j
= 0; j
< ncopies
; ++j
)
5496 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5498 = vect_get_vec_def_for_operand (scatter_off
, stmt
);
5500 else if (modifier
!= NONE
&& (j
& 1))
5502 if (modifier
== WIDEN
)
5505 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5506 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5509 else if (modifier
== NARROW
)
5511 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5514 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5522 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5524 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5527 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5529 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5530 == TYPE_VECTOR_SUBPARTS (srctype
));
5531 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5532 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5533 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5534 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5538 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5540 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5541 == TYPE_VECTOR_SUBPARTS (idxtype
));
5542 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5543 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5544 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5545 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5550 = gimple_build_call (scatter_decl
, 5, ptr
, mask
, op
, src
, scale
);
5552 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5554 if (prev_stmt_info
== NULL
)
5555 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5557 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5558 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5565 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5566 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5568 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5571 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5573 /* We vectorize all the stmts of the interleaving group when we
5574 reach the last stmt in the group. */
5575 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5576 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5585 grouped_store
= false;
5586 /* VEC_NUM is the number of vect stmts to be created for this
5588 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5589 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5590 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5591 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5592 op
= gimple_assign_rhs1 (first_stmt
);
5595 /* VEC_NUM is the number of vect stmts to be created for this
5597 vec_num
= group_size
;
5603 group_size
= vec_num
= 1;
5606 if (dump_enabled_p ())
5607 dump_printf_loc (MSG_NOTE
, vect_location
,
5608 "transform store. ncopies = %d\n", ncopies
);
5610 if (STMT_VINFO_STRIDED_P (stmt_info
))
5612 gimple_stmt_iterator incr_gsi
;
5618 gimple_seq stmts
= NULL
;
5619 tree stride_base
, stride_step
, alias_off
;
5623 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5626 = fold_build_pointer_plus
5627 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5628 size_binop (PLUS_EXPR
,
5629 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5630 convert_to_ptrofftype (DR_INIT(first_dr
))));
5631 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5633 /* For a store with loop-invariant (but other than power-of-2)
5634 stride (i.e. not a grouped access) like so:
5636 for (i = 0; i < n; i += stride)
5639 we generate a new induction variable and new stores from
5640 the components of the (vectorized) rhs:
5642 for (j = 0; ; j += VF*stride)
5647 array[j + stride] = tmp2;
5651 unsigned nstores
= nunits
;
5653 tree ltype
= elem_type
;
5656 if (group_size
< nunits
5657 && nunits
% group_size
== 0)
5659 nstores
= nunits
/ group_size
;
5661 ltype
= build_vector_type (elem_type
, group_size
);
5663 else if (group_size
>= nunits
5664 && group_size
% nunits
== 0)
5670 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5671 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5674 ivstep
= stride_step
;
5675 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5676 build_int_cst (TREE_TYPE (ivstep
), vf
));
5678 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5680 create_iv (stride_base
, ivstep
, NULL
,
5681 loop
, &incr_gsi
, insert_after
,
5683 incr
= gsi_stmt (incr_gsi
);
5684 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5686 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5688 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5690 prev_stmt_info
= NULL
;
5691 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5692 next_stmt
= first_stmt
;
5693 for (g
= 0; g
< group_size
; g
++)
5695 running_off
= offvar
;
5698 tree size
= TYPE_SIZE_UNIT (ltype
);
5699 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5701 tree newoff
= copy_ssa_name (running_off
, NULL
);
5702 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5704 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5705 running_off
= newoff
;
5707 unsigned int group_el
= 0;
5708 unsigned HOST_WIDE_INT
5709 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
5710 for (j
= 0; j
< ncopies
; j
++)
5712 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5713 and first_stmt == stmt. */
5718 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5720 vec_oprnd
= vec_oprnds
[0];
5724 gcc_assert (gimple_assign_single_p (next_stmt
));
5725 op
= gimple_assign_rhs1 (next_stmt
);
5726 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5732 vec_oprnd
= vec_oprnds
[j
];
5735 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
5736 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5740 for (i
= 0; i
< nstores
; i
++)
5742 tree newref
, newoff
;
5743 gimple
*incr
, *assign
;
5744 tree size
= TYPE_SIZE (ltype
);
5745 /* Extract the i'th component. */
5746 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5747 bitsize_int (i
), size
);
5748 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5751 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5755 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
5757 newref
= build2 (MEM_REF
, ltype
,
5758 running_off
, this_off
);
5760 /* And store it to *running_off. */
5761 assign
= gimple_build_assign (newref
, elem
);
5762 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5766 || group_el
== group_size
)
5768 newoff
= copy_ssa_name (running_off
, NULL
);
5769 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5770 running_off
, stride_step
);
5771 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5773 running_off
= newoff
;
5776 if (g
== group_size
- 1
5779 if (j
== 0 && i
== 0)
5780 STMT_VINFO_VEC_STMT (stmt_info
)
5781 = *vec_stmt
= assign
;
5783 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5784 prev_stmt_info
= vinfo_for_stmt (assign
);
5788 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5795 dr_chain
.create (group_size
);
5796 oprnds
.create (group_size
);
5798 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5799 gcc_assert (alignment_support_scheme
);
5800 /* Targets with store-lane instructions must not require explicit
5802 gcc_assert (!store_lanes_p
5803 || alignment_support_scheme
== dr_aligned
5804 || alignment_support_scheme
== dr_unaligned_supported
);
5807 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5810 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5812 aggr_type
= vectype
;
5814 /* In case the vectorization factor (VF) is bigger than the number
5815 of elements that we can fit in a vectype (nunits), we have to generate
5816 more than one vector stmt - i.e - we need to "unroll" the
5817 vector stmt by a factor VF/nunits. For more details see documentation in
5818 vect_get_vec_def_for_copy_stmt. */
5820 /* In case of interleaving (non-unit grouped access):
5827 We create vectorized stores starting from base address (the access of the
5828 first stmt in the chain (S2 in the above example), when the last store stmt
5829 of the chain (S4) is reached:
5832 VS2: &base + vec_size*1 = vx0
5833 VS3: &base + vec_size*2 = vx1
5834 VS4: &base + vec_size*3 = vx3
5836 Then permutation statements are generated:
5838 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5839 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5842 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5843 (the order of the data-refs in the output of vect_permute_store_chain
5844 corresponds to the order of scalar stmts in the interleaving chain - see
5845 the documentation of vect_permute_store_chain()).
5847 In case of both multiple types and interleaving, above vector stores and
5848 permutation stmts are created for every copy. The result vector stmts are
5849 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5850 STMT_VINFO_RELATED_STMT for the next copies.
5853 prev_stmt_info
= NULL
;
5854 for (j
= 0; j
< ncopies
; j
++)
5861 /* Get vectorized arguments for SLP_NODE. */
5862 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5863 NULL
, slp_node
, -1);
5865 vec_oprnd
= vec_oprnds
[0];
5869 /* For interleaved stores we collect vectorized defs for all the
5870 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5871 used as an input to vect_permute_store_chain(), and OPRNDS as
5872 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5874 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5875 OPRNDS are of size 1. */
5876 next_stmt
= first_stmt
;
5877 for (i
= 0; i
< group_size
; i
++)
5879 /* Since gaps are not supported for interleaved stores,
5880 GROUP_SIZE is the exact number of stmts in the chain.
5881 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5882 there is no interleaving, GROUP_SIZE is 1, and only one
5883 iteration of the loop will be executed. */
5884 gcc_assert (next_stmt
5885 && gimple_assign_single_p (next_stmt
));
5886 op
= gimple_assign_rhs1 (next_stmt
);
5888 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5889 dr_chain
.quick_push (vec_oprnd
);
5890 oprnds
.quick_push (vec_oprnd
);
5891 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5895 /* We should have catched mismatched types earlier. */
5896 gcc_assert (useless_type_conversion_p (vectype
,
5897 TREE_TYPE (vec_oprnd
)));
5898 bool simd_lane_access_p
5899 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5900 if (simd_lane_access_p
5901 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5902 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5903 && integer_zerop (DR_OFFSET (first_dr
))
5904 && integer_zerop (DR_INIT (first_dr
))
5905 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5906 get_alias_set (DR_REF (first_dr
))))
5908 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5909 dataref_offset
= build_int_cst (reference_alias_ptr_type
5910 (DR_REF (first_dr
)), 0);
5915 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5916 simd_lane_access_p
? loop
: NULL
,
5917 offset
, &dummy
, gsi
, &ptr_incr
,
5918 simd_lane_access_p
, &inv_p
);
5919 gcc_assert (bb_vinfo
|| !inv_p
);
5923 /* For interleaved stores we created vectorized defs for all the
5924 defs stored in OPRNDS in the previous iteration (previous copy).
5925 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5926 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5928 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5929 OPRNDS are of size 1. */
5930 for (i
= 0; i
< group_size
; i
++)
5933 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
5934 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5935 dr_chain
[i
] = vec_oprnd
;
5936 oprnds
[i
] = vec_oprnd
;
5940 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5941 TYPE_SIZE_UNIT (aggr_type
));
5943 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5944 TYPE_SIZE_UNIT (aggr_type
));
5951 /* Combine all the vectors into an array. */
5952 vec_array
= create_vector_array (vectype
, vec_num
);
5953 for (i
= 0; i
< vec_num
; i
++)
5955 vec_oprnd
= dr_chain
[i
];
5956 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5960 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5961 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5962 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5963 gimple_call_set_lhs (new_stmt
, data_ref
);
5964 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5972 result_chain
.create (group_size
);
5974 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5978 next_stmt
= first_stmt
;
5979 for (i
= 0; i
< vec_num
; i
++)
5981 unsigned align
, misalign
;
5984 /* Bump the vector pointer. */
5985 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5989 vec_oprnd
= vec_oprnds
[i
];
5990 else if (grouped_store
)
5991 /* For grouped stores vectorized defs are interleaved in
5992 vect_permute_store_chain(). */
5993 vec_oprnd
= result_chain
[i
];
5995 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
5999 : build_int_cst (reference_alias_ptr_type
6000 (DR_REF (first_dr
)), 0));
6001 align
= TYPE_ALIGN_UNIT (vectype
);
6002 if (aligned_access_p (first_dr
))
6004 else if (DR_MISALIGNMENT (first_dr
) == -1)
6006 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6007 align
= TYPE_ALIGN_UNIT (elem_type
);
6009 align
= get_object_alignment (DR_REF (first_dr
))
6012 TREE_TYPE (data_ref
)
6013 = build_aligned_type (TREE_TYPE (data_ref
),
6014 align
* BITS_PER_UNIT
);
6018 TREE_TYPE (data_ref
)
6019 = build_aligned_type (TREE_TYPE (data_ref
),
6020 TYPE_ALIGN (elem_type
));
6021 misalign
= DR_MISALIGNMENT (first_dr
);
6023 if (dataref_offset
== NULL_TREE
6024 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6025 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6029 && dt
!= vect_constant_def
6030 && dt
!= vect_external_def
)
6032 tree perm_mask
= perm_mask_for_reverse (vectype
);
6034 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6036 tree new_temp
= make_ssa_name (perm_dest
);
6038 /* Generate the permute statement. */
6040 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6041 vec_oprnd
, perm_mask
);
6042 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6044 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6045 vec_oprnd
= new_temp
;
6048 /* Arguments are ready. Create the new vector stmt. */
6049 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6050 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6055 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6063 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6065 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6066 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6070 dr_chain
.release ();
6072 result_chain
.release ();
6073 vec_oprnds
.release ();
6078 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6079 VECTOR_CST mask. No checks are made that the target platform supports the
6080 mask, so callers may wish to test can_vec_perm_p separately, or use
6081 vect_gen_perm_mask_checked. */
6084 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6086 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6089 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6091 mask_elt_type
= lang_hooks
.types
.type_for_mode
6092 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6093 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6095 mask_elts
= XALLOCAVEC (tree
, nunits
);
6096 for (i
= nunits
- 1; i
>= 0; i
--)
6097 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6098 mask_vec
= build_vector (mask_type
, mask_elts
);
6103 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6104 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6107 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6109 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6110 return vect_gen_perm_mask_any (vectype
, sel
);
6113 /* Given a vector variable X and Y, that was generated for the scalar
6114 STMT, generate instructions to permute the vector elements of X and Y
6115 using permutation mask MASK_VEC, insert them at *GSI and return the
6116 permuted vector variable. */
6119 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6120 gimple_stmt_iterator
*gsi
)
6122 tree vectype
= TREE_TYPE (x
);
6123 tree perm_dest
, data_ref
;
6126 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6127 data_ref
= make_ssa_name (perm_dest
);
6129 /* Generate the permute statement. */
6130 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6131 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6136 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6137 inserting them on the loops preheader edge. Returns true if we
6138 were successful in doing so (and thus STMT can be moved then),
6139 otherwise returns false. */
6142 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6148 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6150 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6151 if (!gimple_nop_p (def_stmt
)
6152 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6154 /* Make sure we don't need to recurse. While we could do
6155 so in simple cases when there are more complex use webs
6156 we don't have an easy way to preserve stmt order to fulfil
6157 dependencies within them. */
6160 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6162 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6164 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6165 if (!gimple_nop_p (def_stmt2
)
6166 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6176 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6178 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6179 if (!gimple_nop_p (def_stmt
)
6180 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6182 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6183 gsi_remove (&gsi
, false);
6184 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6191 /* vectorizable_load.
6193 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6195 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6196 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6197 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6200 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6201 slp_tree slp_node
, slp_instance slp_node_instance
)
6204 tree vec_dest
= NULL
;
6205 tree data_ref
= NULL
;
6206 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6207 stmt_vec_info prev_stmt_info
;
6208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6209 struct loop
*loop
= NULL
;
6210 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6211 bool nested_in_vect_loop
= false;
6212 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6216 gimple
*new_stmt
= NULL
;
6218 enum dr_alignment_support alignment_support_scheme
;
6219 tree dataref_ptr
= NULL_TREE
;
6220 tree dataref_offset
= NULL_TREE
;
6221 gimple
*ptr_incr
= NULL
;
6223 int i
, j
, group_size
= -1, group_gap_adj
;
6224 tree msq
= NULL_TREE
, lsq
;
6225 tree offset
= NULL_TREE
;
6226 tree byte_offset
= NULL_TREE
;
6227 tree realignment_token
= NULL_TREE
;
6229 vec
<tree
> dr_chain
= vNULL
;
6230 bool grouped_load
= false;
6231 bool load_lanes_p
= false;
6233 gimple
*first_stmt_for_drptr
= NULL
;
6235 bool negative
= false;
6236 bool compute_in_loop
= false;
6237 struct loop
*at_loop
;
6239 bool slp
= (slp_node
!= NULL
);
6240 bool slp_perm
= false;
6241 enum tree_code code
;
6242 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6245 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
6246 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
6247 int gather_scale
= 1;
6248 enum vect_def_type gather_dt
= vect_unknown_def_type
;
6249 vec_info
*vinfo
= stmt_info
->vinfo
;
6251 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6254 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6258 /* Is vectorizable load? */
6259 if (!is_gimple_assign (stmt
))
6262 scalar_dest
= gimple_assign_lhs (stmt
);
6263 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6266 code
= gimple_assign_rhs_code (stmt
);
6267 if (code
!= ARRAY_REF
6268 && code
!= BIT_FIELD_REF
6269 && code
!= INDIRECT_REF
6270 && code
!= COMPONENT_REF
6271 && code
!= IMAGPART_EXPR
6272 && code
!= REALPART_EXPR
6274 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6277 if (!STMT_VINFO_DATA_REF (stmt_info
))
6280 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6281 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6285 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6286 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6287 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6292 /* Multiple types in SLP are handled by creating the appropriate number of
6293 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6298 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6300 gcc_assert (ncopies
>= 1);
6302 /* FORNOW. This restriction should be relaxed. */
6303 if (nested_in_vect_loop
&& ncopies
> 1)
6305 if (dump_enabled_p ())
6306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6307 "multiple types in nested loop.\n");
6311 /* Invalidate assumptions made by dependence analysis when vectorization
6312 on the unrolled body effectively re-orders stmts. */
6314 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6315 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6316 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6318 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6320 "cannot perform implicit CSE when unrolling "
6321 "with negative dependence distance\n");
6325 elem_type
= TREE_TYPE (vectype
);
6326 mode
= TYPE_MODE (vectype
);
6328 /* FORNOW. In some cases can vectorize even if data-type not supported
6329 (e.g. - data copies). */
6330 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6334 "Aligned load, but unsupported type.\n");
6338 /* Check if the load is a part of an interleaving chain. */
6339 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6341 grouped_load
= true;
6343 gcc_assert (!nested_in_vect_loop
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6345 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6346 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6348 if (!slp
&& !STMT_VINFO_STRIDED_P (stmt_info
))
6350 if (vect_load_lanes_supported (vectype
, group_size
))
6351 load_lanes_p
= true;
6352 else if (!vect_grouped_load_supported (vectype
, group_size
))
6356 /* If this is single-element interleaving with an element distance
6357 that leaves unused vector loads around punt - we at least create
6358 very sub-optimal code in that case (and blow up memory,
6360 if (first_stmt
== stmt
6361 && !GROUP_NEXT_ELEMENT (stmt_info
))
6363 if (GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
6365 if (dump_enabled_p ())
6366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6367 "single-element interleaving not supported "
6368 "for not adjacent vector loads\n");
6372 /* Single-element interleaving requires peeling for gaps. */
6373 gcc_assert (GROUP_GAP (stmt_info
));
6376 /* If there is a gap in the end of the group or the group size cannot
6377 be made a multiple of the vector element count then we access excess
6378 elements in the last iteration and thus need to peel that off. */
6380 && ! STMT_VINFO_STRIDED_P (stmt_info
)
6381 && (GROUP_GAP (vinfo_for_stmt (first_stmt
)) != 0
6382 || (!slp
&& !load_lanes_p
&& vf
% group_size
!= 0)))
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6386 "Data access with gaps requires scalar "
6390 if (dump_enabled_p ())
6391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6392 "Peeling for outer loop is not supported\n");
6396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
6399 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6402 /* ??? The following is overly pessimistic (as well as the loop
6403 case above) in the case we can statically determine the excess
6404 elements loaded are within the bounds of a decl that is accessed.
6405 Likewise for BB vectorizations using masked loads is a possibility. */
6406 if (bb_vinfo
&& slp_perm
&& group_size
% nunits
!= 0)
6408 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6409 "BB vectorization with gaps at the end of a load "
6410 "is not supported\n");
6414 /* Invalidate assumptions made by dependence analysis when vectorization
6415 on the unrolled body effectively re-orders stmts. */
6416 if (!PURE_SLP_STMT (stmt_info
)
6417 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6418 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6419 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6421 if (dump_enabled_p ())
6422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6423 "cannot perform implicit CSE when performing "
6424 "group loads with negative dependence distance\n");
6428 /* Similarly when the stmt is a load that is both part of a SLP
6429 instance and a loop vectorized stmt via the same-dr mechanism
6430 we have to give up. */
6431 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6432 && (STMT_SLP_TYPE (stmt_info
)
6433 != STMT_SLP_TYPE (vinfo_for_stmt
6434 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6436 if (dump_enabled_p ())
6437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6438 "conflicting SLP types for CSEd load\n");
6444 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6447 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
6448 &gather_off
, &gather_scale
);
6449 gcc_assert (gather_decl
);
6450 if (!vect_is_simple_use (gather_off
, vinfo
, &def_stmt
, &gather_dt
,
6451 &gather_off_vectype
))
6453 if (dump_enabled_p ())
6454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6455 "gather index use not simple.\n");
6459 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6463 negative
= tree_int_cst_compare (nested_in_vect_loop
6464 ? STMT_VINFO_DR_STEP (stmt_info
)
6466 size_zero_node
) < 0;
6467 if (negative
&& ncopies
> 1)
6469 if (dump_enabled_p ())
6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6471 "multiple types with negative step.\n");
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6481 "negative step for group load not supported"
6485 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6486 if (alignment_support_scheme
!= dr_aligned
6487 && alignment_support_scheme
!= dr_unaligned_supported
)
6489 if (dump_enabled_p ())
6490 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6491 "negative step but alignment required.\n");
6494 if (!perm_mask_for_reverse (vectype
))
6496 if (dump_enabled_p ())
6497 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6498 "negative step and reversing not supported."
6505 if (!vec_stmt
) /* transformation not required. */
6507 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6508 /* The SLP costs are calculated during SLP analysis. */
6509 if (!PURE_SLP_STMT (stmt_info
))
6510 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6515 if (dump_enabled_p ())
6516 dump_printf_loc (MSG_NOTE
, vect_location
,
6517 "transform load. ncopies = %d\n", ncopies
);
6521 ensure_base_align (stmt_info
, dr
);
6523 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6525 tree vec_oprnd0
= NULL_TREE
, op
;
6526 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6527 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6528 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6529 edge pe
= loop_preheader_edge (loop
);
6532 enum { NARROW
, NONE
, WIDEN
} modifier
;
6533 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6535 if (nunits
== gather_off_nunits
)
6537 else if (nunits
== gather_off_nunits
/ 2)
6539 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6542 for (i
= 0; i
< gather_off_nunits
; ++i
)
6543 sel
[i
] = i
| nunits
;
6545 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6547 else if (nunits
== gather_off_nunits
* 2)
6549 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6552 for (i
= 0; i
< nunits
; ++i
)
6553 sel
[i
] = i
< gather_off_nunits
6554 ? i
: i
+ nunits
- gather_off_nunits
;
6556 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6562 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6563 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6564 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6565 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6566 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6567 scaletype
= TREE_VALUE (arglist
);
6568 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6570 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6572 ptr
= fold_convert (ptrtype
, gather_base
);
6573 if (!is_gimple_min_invariant (ptr
))
6575 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6576 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6577 gcc_assert (!new_bb
);
6580 /* Currently we support only unconditional gather loads,
6581 so mask should be all ones. */
6582 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6583 mask
= build_int_cst (masktype
, -1);
6584 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6586 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6587 mask
= build_vector_from_val (masktype
, mask
);
6588 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6590 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6594 for (j
= 0; j
< 6; ++j
)
6596 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6597 mask
= build_real (TREE_TYPE (masktype
), r
);
6598 mask
= build_vector_from_val (masktype
, mask
);
6599 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6604 scale
= build_int_cst (scaletype
, gather_scale
);
6606 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6607 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6608 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6612 for (j
= 0; j
< 6; ++j
)
6614 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6615 merge
= build_real (TREE_TYPE (rettype
), r
);
6619 merge
= build_vector_from_val (rettype
, merge
);
6620 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6622 prev_stmt_info
= NULL
;
6623 for (j
= 0; j
< ncopies
; ++j
)
6625 if (modifier
== WIDEN
&& (j
& 1))
6626 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6627 perm_mask
, stmt
, gsi
);
6630 = vect_get_vec_def_for_operand (gather_off
, stmt
);
6633 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6635 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6637 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6638 == TYPE_VECTOR_SUBPARTS (idxtype
));
6639 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6640 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6642 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6643 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6648 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6650 if (!useless_type_conversion_p (vectype
, rettype
))
6652 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6653 == TYPE_VECTOR_SUBPARTS (rettype
));
6654 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6655 gimple_call_set_lhs (new_stmt
, op
);
6656 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6657 var
= make_ssa_name (vec_dest
);
6658 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6660 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6664 var
= make_ssa_name (vec_dest
, new_stmt
);
6665 gimple_call_set_lhs (new_stmt
, var
);
6668 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6670 if (modifier
== NARROW
)
6677 var
= permute_vec_elements (prev_res
, var
,
6678 perm_mask
, stmt
, gsi
);
6679 new_stmt
= SSA_NAME_DEF_STMT (var
);
6682 if (prev_stmt_info
== NULL
)
6683 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6685 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6686 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6690 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6692 gimple_stmt_iterator incr_gsi
;
6698 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6699 gimple_seq stmts
= NULL
;
6700 tree stride_base
, stride_step
, alias_off
;
6702 gcc_assert (!nested_in_vect_loop
);
6704 if (slp
&& grouped_load
)
6705 first_dr
= STMT_VINFO_DATA_REF
6706 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6711 = fold_build_pointer_plus
6712 (DR_BASE_ADDRESS (first_dr
),
6713 size_binop (PLUS_EXPR
,
6714 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6715 convert_to_ptrofftype (DR_INIT (first_dr
))));
6716 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6718 /* For a load with loop-invariant (but other than power-of-2)
6719 stride (i.e. not a grouped access) like so:
6721 for (i = 0; i < n; i += stride)
6724 we generate a new induction variable and new accesses to
6725 form a new vector (or vectors, depending on ncopies):
6727 for (j = 0; ; j += VF*stride)
6729 tmp2 = array[j + stride];
6731 vectemp = {tmp1, tmp2, ...}
6734 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6735 build_int_cst (TREE_TYPE (stride_step
), vf
));
6737 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6739 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6740 loop
, &incr_gsi
, insert_after
,
6742 incr
= gsi_stmt (incr_gsi
);
6743 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6745 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6746 &stmts
, true, NULL_TREE
);
6748 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6750 prev_stmt_info
= NULL
;
6751 running_off
= offvar
;
6752 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6753 int nloads
= nunits
;
6755 tree ltype
= TREE_TYPE (vectype
);
6756 auto_vec
<tree
> dr_chain
;
6759 if (group_size
< nunits
6760 && nunits
% group_size
== 0)
6762 nloads
= nunits
/ group_size
;
6764 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6765 ltype
= build_aligned_type (ltype
,
6766 TYPE_ALIGN (TREE_TYPE (vectype
)));
6768 else if (group_size
>= nunits
6769 && group_size
% nunits
== 0)
6774 ltype
= build_aligned_type (ltype
,
6775 TYPE_ALIGN (TREE_TYPE (vectype
)));
6777 /* For SLP permutation support we need to load the whole group,
6778 not only the number of vector stmts the permutation result
6782 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
6783 dr_chain
.create (ncopies
);
6786 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6789 unsigned HOST_WIDE_INT
6790 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6791 for (j
= 0; j
< ncopies
; j
++)
6794 vec_alloc (v
, nloads
);
6795 for (i
= 0; i
< nloads
; i
++)
6797 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6799 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6800 build2 (MEM_REF
, ltype
,
6801 running_off
, this_off
));
6802 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6804 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
6805 gimple_assign_lhs (new_stmt
));
6809 || group_el
== group_size
)
6811 tree newoff
= copy_ssa_name (running_off
);
6812 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6813 running_off
, stride_step
);
6814 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6816 running_off
= newoff
;
6822 tree vec_inv
= build_constructor (vectype
, v
);
6823 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6824 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6830 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6832 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6837 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6839 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6840 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6844 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6845 slp_node_instance
, false);
6851 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6852 /* For SLP vectorization we directly vectorize a subchain
6853 without permutation. */
6854 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6855 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6856 /* For BB vectorization always use the first stmt to base
6857 the data ref pointer on. */
6859 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6861 /* Check if the chain of loads is already vectorized. */
6862 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6863 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6864 ??? But we can only do so if there is exactly one
6865 as we have no way to get at the rest. Leave the CSE
6867 ??? With the group load eventually participating
6868 in multiple different permutations (having multiple
6869 slp nodes which refer to the same group) the CSE
6870 is even wrong code. See PR56270. */
6873 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6876 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6877 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6880 /* VEC_NUM is the number of vect stmts to be created for this group. */
6883 grouped_load
= false;
6884 /* For SLP permutation support we need to load the whole group,
6885 not only the number of vector stmts the permutation result
6888 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6890 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6891 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6894 vec_num
= group_size
;
6900 group_size
= vec_num
= 1;
6904 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6905 gcc_assert (alignment_support_scheme
);
6906 /* Targets with load-lane instructions must not require explicit
6908 gcc_assert (!load_lanes_p
6909 || alignment_support_scheme
== dr_aligned
6910 || alignment_support_scheme
== dr_unaligned_supported
);
6912 /* In case the vectorization factor (VF) is bigger than the number
6913 of elements that we can fit in a vectype (nunits), we have to generate
6914 more than one vector stmt - i.e - we need to "unroll" the
6915 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6916 from one copy of the vector stmt to the next, in the field
6917 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6918 stages to find the correct vector defs to be used when vectorizing
6919 stmts that use the defs of the current stmt. The example below
6920 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6921 need to create 4 vectorized stmts):
6923 before vectorization:
6924 RELATED_STMT VEC_STMT
6928 step 1: vectorize stmt S1:
6929 We first create the vector stmt VS1_0, and, as usual, record a
6930 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6931 Next, we create the vector stmt VS1_1, and record a pointer to
6932 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6933 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6935 RELATED_STMT VEC_STMT
6936 VS1_0: vx0 = memref0 VS1_1 -
6937 VS1_1: vx1 = memref1 VS1_2 -
6938 VS1_2: vx2 = memref2 VS1_3 -
6939 VS1_3: vx3 = memref3 - -
6940 S1: x = load - VS1_0
6943 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6944 information we recorded in RELATED_STMT field is used to vectorize
6947 /* In case of interleaving (non-unit grouped access):
6954 Vectorized loads are created in the order of memory accesses
6955 starting from the access of the first stmt of the chain:
6958 VS2: vx1 = &base + vec_size*1
6959 VS3: vx3 = &base + vec_size*2
6960 VS4: vx4 = &base + vec_size*3
6962 Then permutation statements are generated:
6964 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6965 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6968 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6969 (the order of the data-refs in the output of vect_permute_load_chain
6970 corresponds to the order of scalar stmts in the interleaving chain - see
6971 the documentation of vect_permute_load_chain()).
6972 The generation of permutation stmts and recording them in
6973 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6975 In case of both multiple types and interleaving, the vector loads and
6976 permutation stmts above are created for every copy. The result vector
6977 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6978 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6980 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6981 on a target that supports unaligned accesses (dr_unaligned_supported)
6982 we generate the following code:
6986 p = p + indx * vectype_size;
6991 Otherwise, the data reference is potentially unaligned on a target that
6992 does not support unaligned accesses (dr_explicit_realign_optimized) -
6993 then generate the following code, in which the data in each iteration is
6994 obtained by two vector loads, one from the previous iteration, and one
6995 from the current iteration:
6997 msq_init = *(floor(p1))
6998 p2 = initial_addr + VS - 1;
6999 realignment_token = call target_builtin;
7002 p2 = p2 + indx * vectype_size
7004 vec_dest = realign_load (msq, lsq, realignment_token)
7009 /* If the misalignment remains the same throughout the execution of the
7010 loop, we can create the init_addr and permutation mask at the loop
7011 preheader. Otherwise, it needs to be created inside the loop.
7012 This can only occur when vectorizing memory accesses in the inner-loop
7013 nested within an outer-loop that is being vectorized. */
7015 if (nested_in_vect_loop
7016 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7017 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7019 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7020 compute_in_loop
= true;
7023 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7024 || alignment_support_scheme
== dr_explicit_realign
)
7025 && !compute_in_loop
)
7027 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7028 alignment_support_scheme
, NULL_TREE
,
7030 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7032 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7033 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7041 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7044 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7046 aggr_type
= vectype
;
7048 prev_stmt_info
= NULL
;
7049 for (j
= 0; j
< ncopies
; j
++)
7051 /* 1. Create the vector or array pointer update chain. */
7054 bool simd_lane_access_p
7055 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7056 if (simd_lane_access_p
7057 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7058 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7059 && integer_zerop (DR_OFFSET (first_dr
))
7060 && integer_zerop (DR_INIT (first_dr
))
7061 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7062 get_alias_set (DR_REF (first_dr
)))
7063 && (alignment_support_scheme
== dr_aligned
7064 || alignment_support_scheme
== dr_unaligned_supported
))
7066 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7067 dataref_offset
= build_int_cst (reference_alias_ptr_type
7068 (DR_REF (first_dr
)), 0);
7071 else if (first_stmt_for_drptr
7072 && first_stmt
!= first_stmt_for_drptr
)
7075 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7076 at_loop
, offset
, &dummy
, gsi
,
7077 &ptr_incr
, simd_lane_access_p
,
7078 &inv_p
, byte_offset
);
7079 /* Adjust the pointer by the difference to first_stmt. */
7080 data_reference_p ptrdr
7081 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7082 tree diff
= fold_convert (sizetype
,
7083 size_binop (MINUS_EXPR
,
7086 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7091 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7092 offset
, &dummy
, gsi
, &ptr_incr
,
7093 simd_lane_access_p
, &inv_p
,
7096 else if (dataref_offset
)
7097 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7098 TYPE_SIZE_UNIT (aggr_type
));
7100 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7101 TYPE_SIZE_UNIT (aggr_type
));
7103 if (grouped_load
|| slp_perm
)
7104 dr_chain
.create (vec_num
);
7110 vec_array
= create_vector_array (vectype
, vec_num
);
7113 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7114 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
7115 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7116 gimple_call_set_lhs (new_stmt
, vec_array
);
7117 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7119 /* Extract each vector into an SSA_NAME. */
7120 for (i
= 0; i
< vec_num
; i
++)
7122 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7124 dr_chain
.quick_push (new_temp
);
7127 /* Record the mapping between SSA_NAMEs and statements. */
7128 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7132 for (i
= 0; i
< vec_num
; i
++)
7135 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7138 /* 2. Create the vector-load in the loop. */
7139 switch (alignment_support_scheme
)
7142 case dr_unaligned_supported
:
7144 unsigned int align
, misalign
;
7147 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7150 : build_int_cst (reference_alias_ptr_type
7151 (DR_REF (first_dr
)), 0));
7152 align
= TYPE_ALIGN_UNIT (vectype
);
7153 if (alignment_support_scheme
== dr_aligned
)
7155 gcc_assert (aligned_access_p (first_dr
));
7158 else if (DR_MISALIGNMENT (first_dr
) == -1)
7160 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7161 align
= TYPE_ALIGN_UNIT (elem_type
);
7163 align
= (get_object_alignment (DR_REF (first_dr
))
7166 TREE_TYPE (data_ref
)
7167 = build_aligned_type (TREE_TYPE (data_ref
),
7168 align
* BITS_PER_UNIT
);
7172 TREE_TYPE (data_ref
)
7173 = build_aligned_type (TREE_TYPE (data_ref
),
7174 TYPE_ALIGN (elem_type
));
7175 misalign
= DR_MISALIGNMENT (first_dr
);
7177 if (dataref_offset
== NULL_TREE
7178 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7179 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7183 case dr_explicit_realign
:
7187 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7189 if (compute_in_loop
)
7190 msq
= vect_setup_realignment (first_stmt
, gsi
,
7192 dr_explicit_realign
,
7195 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7196 ptr
= copy_ssa_name (dataref_ptr
);
7198 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7199 new_stmt
= gimple_build_assign
7200 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7202 (TREE_TYPE (dataref_ptr
),
7203 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7204 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7206 = build2 (MEM_REF
, vectype
, ptr
,
7207 build_int_cst (reference_alias_ptr_type
7208 (DR_REF (first_dr
)), 0));
7209 vec_dest
= vect_create_destination_var (scalar_dest
,
7211 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7212 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7213 gimple_assign_set_lhs (new_stmt
, new_temp
);
7214 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7215 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7216 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7219 bump
= size_binop (MULT_EXPR
, vs
,
7220 TYPE_SIZE_UNIT (elem_type
));
7221 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7222 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7223 new_stmt
= gimple_build_assign
7224 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7227 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7228 ptr
= copy_ssa_name (ptr
, new_stmt
);
7229 gimple_assign_set_lhs (new_stmt
, ptr
);
7230 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7232 = build2 (MEM_REF
, vectype
, ptr
,
7233 build_int_cst (reference_alias_ptr_type
7234 (DR_REF (first_dr
)), 0));
7237 case dr_explicit_realign_optimized
:
7238 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7239 new_temp
= copy_ssa_name (dataref_ptr
);
7241 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7242 new_stmt
= gimple_build_assign
7243 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7245 (TREE_TYPE (dataref_ptr
),
7246 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7247 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7249 = build2 (MEM_REF
, vectype
, new_temp
,
7250 build_int_cst (reference_alias_ptr_type
7251 (DR_REF (first_dr
)), 0));
7256 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7257 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7258 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7259 gimple_assign_set_lhs (new_stmt
, new_temp
);
7260 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7262 /* 3. Handle explicit realignment if necessary/supported.
7264 vec_dest = realign_load (msq, lsq, realignment_token) */
7265 if (alignment_support_scheme
== dr_explicit_realign_optimized
7266 || alignment_support_scheme
== dr_explicit_realign
)
7268 lsq
= gimple_assign_lhs (new_stmt
);
7269 if (!realignment_token
)
7270 realignment_token
= dataref_ptr
;
7271 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7272 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7273 msq
, lsq
, realignment_token
);
7274 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7275 gimple_assign_set_lhs (new_stmt
, new_temp
);
7276 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7278 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7281 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7282 add_phi_arg (phi
, lsq
,
7283 loop_latch_edge (containing_loop
),
7289 /* 4. Handle invariant-load. */
7290 if (inv_p
&& !bb_vinfo
)
7292 gcc_assert (!grouped_load
);
7293 /* If we have versioned for aliasing or the loop doesn't
7294 have any data dependencies that would preclude this,
7295 then we are sure this is a loop invariant load and
7296 thus we can insert it on the preheader edge. */
7297 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7298 && !nested_in_vect_loop
7299 && hoist_defs_of_uses (stmt
, loop
))
7301 if (dump_enabled_p ())
7303 dump_printf_loc (MSG_NOTE
, vect_location
,
7304 "hoisting out of the vectorized "
7306 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7308 tree tem
= copy_ssa_name (scalar_dest
);
7309 gsi_insert_on_edge_immediate
7310 (loop_preheader_edge (loop
),
7311 gimple_build_assign (tem
,
7313 (gimple_assign_rhs1 (stmt
))));
7314 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7315 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7316 set_vinfo_for_stmt (new_stmt
,
7317 new_stmt_vec_info (new_stmt
, vinfo
));
7321 gimple_stmt_iterator gsi2
= *gsi
;
7323 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7325 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7331 tree perm_mask
= perm_mask_for_reverse (vectype
);
7332 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7333 perm_mask
, stmt
, gsi
);
7334 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7337 /* Collect vector loads and later create their permutation in
7338 vect_transform_grouped_load (). */
7339 if (grouped_load
|| slp_perm
)
7340 dr_chain
.quick_push (new_temp
);
7342 /* Store vector loads in the corresponding SLP_NODE. */
7343 if (slp
&& !slp_perm
)
7344 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7346 /* Bump the vector pointer to account for a gap or for excess
7347 elements loaded for a permuted SLP load. */
7348 if (group_gap_adj
!= 0)
7352 = wide_int_to_tree (sizetype
,
7353 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7354 group_gap_adj
, &ovf
));
7355 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7360 if (slp
&& !slp_perm
)
7365 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7366 slp_node_instance
, false))
7368 dr_chain
.release ();
7377 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7378 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7383 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7385 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7386 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7389 dr_chain
.release ();
7395 /* Function vect_is_simple_cond.
7398 LOOP - the loop that is being vectorized.
7399 COND - Condition that is checked for simple use.
7402 *COMP_VECTYPE - the vector type for the comparison.
7404 Returns whether a COND can be vectorized. Checks whether
7405 condition operands are supportable using vec_is_simple_use. */
7408 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7411 enum vect_def_type dt
;
7412 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7415 if (TREE_CODE (cond
) == SSA_NAME
7416 && TREE_CODE (TREE_TYPE (cond
)) == BOOLEAN_TYPE
)
7418 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7419 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7422 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7427 if (!COMPARISON_CLASS_P (cond
))
7430 lhs
= TREE_OPERAND (cond
, 0);
7431 rhs
= TREE_OPERAND (cond
, 1);
7433 if (TREE_CODE (lhs
) == SSA_NAME
)
7435 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7436 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7439 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7440 && TREE_CODE (lhs
) != FIXED_CST
)
7443 if (TREE_CODE (rhs
) == SSA_NAME
)
7445 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7446 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7449 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7450 && TREE_CODE (rhs
) != FIXED_CST
)
7453 if (vectype1
&& vectype2
7454 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7457 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7461 /* vectorizable_condition.
7463 Check if STMT is conditional modify expression that can be vectorized.
7464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7465 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7468 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7469 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7470 else clause if it is 2).
7472 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7475 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7476 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7479 tree scalar_dest
= NULL_TREE
;
7480 tree vec_dest
= NULL_TREE
;
7481 tree cond_expr
, then_clause
, else_clause
;
7482 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7483 tree comp_vectype
= NULL_TREE
;
7484 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7485 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7488 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7489 enum vect_def_type dt
, dts
[4];
7491 enum tree_code code
;
7492 stmt_vec_info prev_stmt_info
= NULL
;
7494 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7495 vec
<tree
> vec_oprnds0
= vNULL
;
7496 vec
<tree
> vec_oprnds1
= vNULL
;
7497 vec
<tree
> vec_oprnds2
= vNULL
;
7498 vec
<tree
> vec_oprnds3
= vNULL
;
7500 bool masked
= false;
7502 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7505 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7507 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7510 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7511 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7515 /* FORNOW: not yet supported. */
7516 if (STMT_VINFO_LIVE_P (stmt_info
))
7518 if (dump_enabled_p ())
7519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7520 "value used after loop.\n");
7525 /* Is vectorizable conditional operation? */
7526 if (!is_gimple_assign (stmt
))
7529 code
= gimple_assign_rhs_code (stmt
);
7531 if (code
!= COND_EXPR
)
7534 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7535 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7536 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7541 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7543 gcc_assert (ncopies
>= 1);
7544 if (reduc_index
&& ncopies
> 1)
7545 return false; /* FORNOW */
7547 cond_expr
= gimple_assign_rhs1 (stmt
);
7548 then_clause
= gimple_assign_rhs2 (stmt
);
7549 else_clause
= gimple_assign_rhs3 (stmt
);
7551 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7556 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7559 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7563 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7566 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7569 masked
= !COMPARISON_CLASS_P (cond_expr
);
7570 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7572 if (vec_cmp_type
== NULL_TREE
)
7577 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7578 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7585 vec_oprnds0
.create (1);
7586 vec_oprnds1
.create (1);
7587 vec_oprnds2
.create (1);
7588 vec_oprnds3
.create (1);
7592 scalar_dest
= gimple_assign_lhs (stmt
);
7593 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7595 /* Handle cond expr. */
7596 for (j
= 0; j
< ncopies
; j
++)
7598 gassign
*new_stmt
= NULL
;
7603 auto_vec
<tree
, 4> ops
;
7604 auto_vec
<vec
<tree
>, 4> vec_defs
;
7607 ops
.safe_push (cond_expr
);
7610 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7611 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7613 ops
.safe_push (then_clause
);
7614 ops
.safe_push (else_clause
);
7615 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7616 vec_oprnds3
= vec_defs
.pop ();
7617 vec_oprnds2
= vec_defs
.pop ();
7619 vec_oprnds1
= vec_defs
.pop ();
7620 vec_oprnds0
= vec_defs
.pop ();
7623 vec_defs
.release ();
7631 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7633 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7639 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7640 stmt
, comp_vectype
);
7641 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0),
7642 loop_vinfo
, >emp
, &dts
[0]);
7645 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7646 stmt
, comp_vectype
);
7647 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1),
7648 loop_vinfo
, >emp
, &dts
[1]);
7650 if (reduc_index
== 1)
7651 vec_then_clause
= reduc_def
;
7654 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7656 vect_is_simple_use (then_clause
, loop_vinfo
,
7659 if (reduc_index
== 2)
7660 vec_else_clause
= reduc_def
;
7663 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7665 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7672 = vect_get_vec_def_for_stmt_copy (dts
[0],
7673 vec_oprnds0
.pop ());
7676 = vect_get_vec_def_for_stmt_copy (dts
[1],
7677 vec_oprnds1
.pop ());
7679 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7680 vec_oprnds2
.pop ());
7681 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7682 vec_oprnds3
.pop ());
7687 vec_oprnds0
.quick_push (vec_cond_lhs
);
7689 vec_oprnds1
.quick_push (vec_cond_rhs
);
7690 vec_oprnds2
.quick_push (vec_then_clause
);
7691 vec_oprnds3
.quick_push (vec_else_clause
);
7694 /* Arguments are ready. Create the new vector stmt. */
7695 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7697 vec_then_clause
= vec_oprnds2
[i
];
7698 vec_else_clause
= vec_oprnds3
[i
];
7701 vec_compare
= vec_cond_lhs
;
7704 vec_cond_rhs
= vec_oprnds1
[i
];
7705 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7706 vec_cond_lhs
, vec_cond_rhs
);
7708 new_temp
= make_ssa_name (vec_dest
);
7709 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
7710 vec_compare
, vec_then_clause
,
7712 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7714 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7721 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7723 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7725 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7728 vec_oprnds0
.release ();
7729 vec_oprnds1
.release ();
7730 vec_oprnds2
.release ();
7731 vec_oprnds3
.release ();
7736 /* vectorizable_comparison.
7738 Check if STMT is comparison expression that can be vectorized.
7739 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7740 comparison, put it in VEC_STMT, and insert it at GSI.
7742 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7745 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7746 gimple
**vec_stmt
, tree reduc_def
,
7749 tree lhs
, rhs1
, rhs2
;
7750 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7751 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7752 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7753 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
7755 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7756 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
7759 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7760 stmt_vec_info prev_stmt_info
= NULL
;
7762 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7763 vec
<tree
> vec_oprnds0
= vNULL
;
7764 vec
<tree
> vec_oprnds1
= vNULL
;
7769 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7772 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
7775 mask_type
= vectype
;
7776 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7781 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7783 gcc_assert (ncopies
>= 1);
7784 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7785 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7789 if (STMT_VINFO_LIVE_P (stmt_info
))
7791 if (dump_enabled_p ())
7792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7793 "value used after loop.\n");
7797 if (!is_gimple_assign (stmt
))
7800 code
= gimple_assign_rhs_code (stmt
);
7802 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
7805 rhs1
= gimple_assign_rhs1 (stmt
);
7806 rhs2
= gimple_assign_rhs2 (stmt
);
7808 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
7809 &dts
[0], &vectype1
))
7812 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
7813 &dts
[1], &vectype2
))
7816 if (vectype1
&& vectype2
7817 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7820 vectype
= vectype1
? vectype1
: vectype2
;
7822 /* Invariant comparison. */
7825 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
7826 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
7829 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
7832 /* Can't compare mask and non-mask types. */
7833 if (vectype1
&& vectype2
7834 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
7837 /* Boolean values may have another representation in vectors
7838 and therefore we prefer bit operations over comparison for
7839 them (which also works for scalar masks). We store opcodes
7840 to use in bitop1 and bitop2. Statement is vectorized as
7841 BITOP2 (rhs1 BITOP1 rhs2) or
7842 rhs1 BITOP2 (BITOP1 rhs2)
7843 depending on bitop1 and bitop2 arity. */
7844 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
7846 if (code
== GT_EXPR
)
7848 bitop1
= BIT_NOT_EXPR
;
7849 bitop2
= BIT_AND_EXPR
;
7851 else if (code
== GE_EXPR
)
7853 bitop1
= BIT_NOT_EXPR
;
7854 bitop2
= BIT_IOR_EXPR
;
7856 else if (code
== LT_EXPR
)
7858 bitop1
= BIT_NOT_EXPR
;
7859 bitop2
= BIT_AND_EXPR
;
7860 std::swap (rhs1
, rhs2
);
7862 else if (code
== LE_EXPR
)
7864 bitop1
= BIT_NOT_EXPR
;
7865 bitop2
= BIT_IOR_EXPR
;
7866 std::swap (rhs1
, rhs2
);
7870 bitop1
= BIT_XOR_EXPR
;
7871 if (code
== EQ_EXPR
)
7872 bitop2
= BIT_NOT_EXPR
;
7878 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
7879 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
7881 if (bitop1
== NOP_EXPR
)
7882 return expand_vec_cmp_expr_p (vectype
, mask_type
);
7885 machine_mode mode
= TYPE_MODE (vectype
);
7888 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
7889 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7892 if (bitop2
!= NOP_EXPR
)
7894 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
7895 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7905 vec_oprnds0
.create (1);
7906 vec_oprnds1
.create (1);
7910 lhs
= gimple_assign_lhs (stmt
);
7911 mask
= vect_create_destination_var (lhs
, mask_type
);
7913 /* Handle cmp expr. */
7914 for (j
= 0; j
< ncopies
; j
++)
7916 gassign
*new_stmt
= NULL
;
7921 auto_vec
<tree
, 2> ops
;
7922 auto_vec
<vec
<tree
>, 2> vec_defs
;
7924 ops
.safe_push (rhs1
);
7925 ops
.safe_push (rhs2
);
7926 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7927 vec_oprnds1
= vec_defs
.pop ();
7928 vec_oprnds0
= vec_defs
.pop ();
7932 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
7933 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
7938 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
7939 vec_oprnds0
.pop ());
7940 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
7941 vec_oprnds1
.pop ());
7946 vec_oprnds0
.quick_push (vec_rhs1
);
7947 vec_oprnds1
.quick_push (vec_rhs2
);
7950 /* Arguments are ready. Create the new vector stmt. */
7951 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
7953 vec_rhs2
= vec_oprnds1
[i
];
7955 new_temp
= make_ssa_name (mask
);
7956 if (bitop1
== NOP_EXPR
)
7958 new_stmt
= gimple_build_assign (new_temp
, code
,
7959 vec_rhs1
, vec_rhs2
);
7960 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7964 if (bitop1
== BIT_NOT_EXPR
)
7965 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
7967 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
7969 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7970 if (bitop2
!= NOP_EXPR
)
7972 tree res
= make_ssa_name (mask
);
7973 if (bitop2
== BIT_NOT_EXPR
)
7974 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
7976 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
7978 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7982 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7989 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7991 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7993 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7996 vec_oprnds0
.release ();
7997 vec_oprnds1
.release ();
8002 /* Make sure the statement is vectorizable. */
8005 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
8007 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8008 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8009 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8011 tree scalar_type
, vectype
;
8012 gimple
*pattern_stmt
;
8013 gimple_seq pattern_def_seq
;
8015 if (dump_enabled_p ())
8017 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8018 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8021 if (gimple_has_volatile_ops (stmt
))
8023 if (dump_enabled_p ())
8024 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8025 "not vectorized: stmt has volatile operands\n");
8030 /* Skip stmts that do not need to be vectorized. In loops this is expected
8032 - the COND_EXPR which is the loop exit condition
8033 - any LABEL_EXPRs in the loop
8034 - computations that are used only for array indexing or loop control.
8035 In basic blocks we only analyze statements that are a part of some SLP
8036 instance, therefore, all the statements are relevant.
8038 Pattern statement needs to be analyzed instead of the original statement
8039 if the original statement is not relevant. Otherwise, we analyze both
8040 statements. In basic blocks we are called from some SLP instance
8041 traversal, don't analyze pattern stmts instead, the pattern stmts
8042 already will be part of SLP instance. */
8044 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8045 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8046 && !STMT_VINFO_LIVE_P (stmt_info
))
8048 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8050 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8051 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8053 /* Analyze PATTERN_STMT instead of the original stmt. */
8054 stmt
= pattern_stmt
;
8055 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8056 if (dump_enabled_p ())
8058 dump_printf_loc (MSG_NOTE
, vect_location
,
8059 "==> examining pattern statement: ");
8060 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8065 if (dump_enabled_p ())
8066 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8071 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8074 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8075 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8077 /* Analyze PATTERN_STMT too. */
8078 if (dump_enabled_p ())
8080 dump_printf_loc (MSG_NOTE
, vect_location
,
8081 "==> examining pattern statement: ");
8082 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8085 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
8089 if (is_pattern_stmt_p (stmt_info
)
8091 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8093 gimple_stmt_iterator si
;
8095 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8097 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8098 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8099 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8101 /* Analyze def stmt of STMT if it's a pattern stmt. */
8102 if (dump_enabled_p ())
8104 dump_printf_loc (MSG_NOTE
, vect_location
,
8105 "==> examining pattern def statement: ");
8106 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8109 if (!vect_analyze_stmt (pattern_def_stmt
,
8110 need_to_vectorize
, node
))
8116 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8118 case vect_internal_def
:
8121 case vect_reduction_def
:
8122 case vect_nested_cycle
:
8123 gcc_assert (!bb_vinfo
8124 && (relevance
== vect_used_in_outer
8125 || relevance
== vect_used_in_outer_by_reduction
8126 || relevance
== vect_used_by_reduction
8127 || relevance
== vect_unused_in_scope
8128 || relevance
== vect_used_only_live
));
8131 case vect_induction_def
:
8132 case vect_constant_def
:
8133 case vect_external_def
:
8134 case vect_unknown_def_type
:
8141 gcc_assert (PURE_SLP_STMT (stmt_info
));
8143 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8144 if (dump_enabled_p ())
8146 dump_printf_loc (MSG_NOTE
, vect_location
,
8147 "get vectype for scalar type: ");
8148 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8149 dump_printf (MSG_NOTE
, "\n");
8152 vectype
= get_vectype_for_scalar_type (scalar_type
);
8155 if (dump_enabled_p ())
8157 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8158 "not SLPed: unsupported data-type ");
8159 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8161 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8166 if (dump_enabled_p ())
8168 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8169 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8170 dump_printf (MSG_NOTE
, "\n");
8173 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8176 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8178 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8179 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8180 || (is_gimple_call (stmt
)
8181 && gimple_call_lhs (stmt
) == NULL_TREE
));
8182 *need_to_vectorize
= true;
8185 if (PURE_SLP_STMT (stmt_info
) && !node
)
8187 dump_printf_loc (MSG_NOTE
, vect_location
,
8188 "handled only by SLP analysis\n");
8194 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8195 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8196 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8197 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8198 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8199 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8200 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8201 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8202 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8203 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8204 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8205 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8206 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8210 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8211 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8212 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8213 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8214 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8215 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8216 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8217 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8218 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8219 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8224 if (dump_enabled_p ())
8226 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8227 "not vectorized: relevant stmt not ");
8228 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8229 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8238 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8239 need extra handling, except for vectorizable reductions. */
8240 if (STMT_VINFO_LIVE_P (stmt_info
)
8241 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8242 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
, -1, NULL
);
8246 if (dump_enabled_p ())
8248 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8249 "not vectorized: live stmt not ");
8250 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8251 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8261 /* Function vect_transform_stmt.
8263 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8266 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8267 bool *grouped_store
, slp_tree slp_node
,
8268 slp_instance slp_node_instance
)
8270 bool is_store
= false;
8271 gimple
*vec_stmt
= NULL
;
8272 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8275 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8276 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8278 switch (STMT_VINFO_TYPE (stmt_info
))
8280 case type_demotion_vec_info_type
:
8281 case type_promotion_vec_info_type
:
8282 case type_conversion_vec_info_type
:
8283 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8287 case induc_vec_info_type
:
8288 gcc_assert (!slp_node
);
8289 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8293 case shift_vec_info_type
:
8294 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8298 case op_vec_info_type
:
8299 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8303 case assignment_vec_info_type
:
8304 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8308 case load_vec_info_type
:
8309 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8314 case store_vec_info_type
:
8315 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8317 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8319 /* In case of interleaving, the whole chain is vectorized when the
8320 last store in the chain is reached. Store stmts before the last
8321 one are skipped, and there vec_stmt_info shouldn't be freed
8323 *grouped_store
= true;
8324 if (STMT_VINFO_VEC_STMT (stmt_info
))
8331 case condition_vec_info_type
:
8332 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8336 case comparison_vec_info_type
:
8337 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8341 case call_vec_info_type
:
8342 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8343 stmt
= gsi_stmt (*gsi
);
8344 if (is_gimple_call (stmt
)
8345 && gimple_call_internal_p (stmt
)
8346 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
8350 case call_simd_clone_vec_info_type
:
8351 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8352 stmt
= gsi_stmt (*gsi
);
8355 case reduc_vec_info_type
:
8356 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8361 if (!STMT_VINFO_LIVE_P (stmt_info
))
8363 if (dump_enabled_p ())
8364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8365 "stmt not supported.\n");
8370 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8371 This would break hybrid SLP vectorization. */
8373 gcc_assert (!vec_stmt
8374 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8376 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8377 is being vectorized, but outside the immediately enclosing loop. */
8379 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8380 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8381 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8382 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8383 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8384 || STMT_VINFO_RELEVANT (stmt_info
) ==
8385 vect_used_in_outer_by_reduction
))
8387 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8388 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8389 imm_use_iterator imm_iter
;
8390 use_operand_p use_p
;
8394 if (dump_enabled_p ())
8395 dump_printf_loc (MSG_NOTE
, vect_location
,
8396 "Record the vdef for outer-loop vectorization.\n");
8398 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8399 (to be used when vectorizing outer-loop stmts that use the DEF of
8401 if (gimple_code (stmt
) == GIMPLE_PHI
)
8402 scalar_dest
= PHI_RESULT (stmt
);
8404 scalar_dest
= gimple_assign_lhs (stmt
);
8406 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8408 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8410 exit_phi
= USE_STMT (use_p
);
8411 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8416 /* Handle stmts whose DEF is used outside the loop-nest that is
8417 being vectorized. */
8422 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8424 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8425 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8426 && STMT_VINFO_TYPE (slp_stmt_info
) != reduc_vec_info_type
)
8428 done
= vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8434 else if (STMT_VINFO_LIVE_P (stmt_info
)
8435 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8437 done
= vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, &vec_stmt
);
8442 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8448 /* Remove a group of stores (for SLP or interleaving), free their
8452 vect_remove_stores (gimple
*first_stmt
)
8454 gimple
*next
= first_stmt
;
8456 gimple_stmt_iterator next_si
;
8460 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8462 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8463 if (is_pattern_stmt_p (stmt_info
))
8464 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8465 /* Free the attached stmt_vec_info and remove the stmt. */
8466 next_si
= gsi_for_stmt (next
);
8467 unlink_stmt_vdef (next
);
8468 gsi_remove (&next_si
, true);
8469 release_defs (next
);
8470 free_stmt_vec_info (next
);
8476 /* Function new_stmt_vec_info.
8478 Create and initialize a new stmt_vec_info struct for STMT. */
8481 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8484 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8486 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8487 STMT_VINFO_STMT (res
) = stmt
;
8489 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8490 STMT_VINFO_LIVE_P (res
) = false;
8491 STMT_VINFO_VECTYPE (res
) = NULL
;
8492 STMT_VINFO_VEC_STMT (res
) = NULL
;
8493 STMT_VINFO_VECTORIZABLE (res
) = true;
8494 STMT_VINFO_IN_PATTERN_P (res
) = false;
8495 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8496 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8497 STMT_VINFO_DATA_REF (res
) = NULL
;
8498 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8500 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8501 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8502 STMT_VINFO_DR_INIT (res
) = NULL
;
8503 STMT_VINFO_DR_STEP (res
) = NULL
;
8504 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8506 if (gimple_code (stmt
) == GIMPLE_PHI
8507 && is_loop_header_bb_p (gimple_bb (stmt
)))
8508 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8510 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8512 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8513 STMT_SLP_TYPE (res
) = loop_vect
;
8514 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8516 GROUP_FIRST_ELEMENT (res
) = NULL
;
8517 GROUP_NEXT_ELEMENT (res
) = NULL
;
8518 GROUP_SIZE (res
) = 0;
8519 GROUP_STORE_COUNT (res
) = 0;
8520 GROUP_GAP (res
) = 0;
8521 GROUP_SAME_DR_STMT (res
) = NULL
;
8527 /* Create a hash table for stmt_vec_info. */
8530 init_stmt_vec_info_vec (void)
8532 gcc_assert (!stmt_vec_info_vec
.exists ());
8533 stmt_vec_info_vec
.create (50);
8537 /* Free hash table for stmt_vec_info. */
8540 free_stmt_vec_info_vec (void)
8544 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8546 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8547 gcc_assert (stmt_vec_info_vec
.exists ());
8548 stmt_vec_info_vec
.release ();
8552 /* Free stmt vectorization related info. */
8555 free_stmt_vec_info (gimple
*stmt
)
8557 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8562 /* Check if this statement has a related "pattern stmt"
8563 (introduced by the vectorizer during the pattern recognition
8564 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8566 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8568 stmt_vec_info patt_info
8569 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8572 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8573 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8574 gimple_set_bb (patt_stmt
, NULL
);
8575 tree lhs
= gimple_get_lhs (patt_stmt
);
8576 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8577 release_ssa_name (lhs
);
8580 gimple_stmt_iterator si
;
8581 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8583 gimple
*seq_stmt
= gsi_stmt (si
);
8584 gimple_set_bb (seq_stmt
, NULL
);
8585 lhs
= gimple_get_lhs (seq_stmt
);
8586 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8587 release_ssa_name (lhs
);
8588 free_stmt_vec_info (seq_stmt
);
8591 free_stmt_vec_info (patt_stmt
);
8595 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8596 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8597 set_vinfo_for_stmt (stmt
, NULL
);
8602 /* Function get_vectype_for_scalar_type_and_size.
8604 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8608 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8610 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8611 machine_mode simd_mode
;
8612 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8619 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8620 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8623 /* For vector types of elements whose mode precision doesn't
8624 match their types precision we use a element type of mode
8625 precision. The vectorization routines will have to make sure
8626 they support the proper result truncation/extension.
8627 We also make sure to build vector types with INTEGER_TYPE
8628 component type only. */
8629 if (INTEGRAL_TYPE_P (scalar_type
)
8630 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8631 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8632 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8633 TYPE_UNSIGNED (scalar_type
));
8635 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8636 When the component mode passes the above test simply use a type
8637 corresponding to that mode. The theory is that any use that
8638 would cause problems with this will disable vectorization anyway. */
8639 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8640 && !INTEGRAL_TYPE_P (scalar_type
))
8641 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8643 /* We can't build a vector type of elements with alignment bigger than
8645 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8646 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8647 TYPE_UNSIGNED (scalar_type
));
8649 /* If we felt back to using the mode fail if there was
8650 no scalar type for it. */
8651 if (scalar_type
== NULL_TREE
)
8654 /* If no size was supplied use the mode the target prefers. Otherwise
8655 lookup a vector mode of the specified size. */
8657 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8659 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8660 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8664 vectype
= build_vector_type (scalar_type
, nunits
);
8666 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8667 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8673 unsigned int current_vector_size
;
8675 /* Function get_vectype_for_scalar_type.
8677 Returns the vector type corresponding to SCALAR_TYPE as supported
8681 get_vectype_for_scalar_type (tree scalar_type
)
8684 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8685 current_vector_size
);
8687 && current_vector_size
== 0)
8688 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8692 /* Function get_mask_type_for_scalar_type.
8694 Returns the mask type corresponding to a result of comparison
8695 of vectors of specified SCALAR_TYPE as supported by target. */
8698 get_mask_type_for_scalar_type (tree scalar_type
)
8700 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
8705 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
8706 current_vector_size
);
8709 /* Function get_same_sized_vectype
8711 Returns a vector type corresponding to SCALAR_TYPE of size
8712 VECTOR_TYPE if supported by the target. */
8715 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8717 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
8718 return build_same_sized_truth_vector_type (vector_type
);
8720 return get_vectype_for_scalar_type_and_size
8721 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8724 /* Function vect_is_simple_use.
8727 VINFO - the vect info of the loop or basic block that is being vectorized.
8728 OPERAND - operand in the loop or bb.
8730 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8731 DT - the type of definition
8733 Returns whether a stmt with OPERAND can be vectorized.
8734 For loops, supportable operands are constants, loop invariants, and operands
8735 that are defined by the current iteration of the loop. Unsupportable
8736 operands are those that are defined by a previous iteration of the loop (as
8737 is the case in reduction/induction computations).
8738 For basic blocks, supportable operands are constants and bb invariants.
8739 For now, operands defined outside the basic block are not supported. */
8742 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8743 gimple
**def_stmt
, enum vect_def_type
*dt
)
8746 *dt
= vect_unknown_def_type
;
8748 if (dump_enabled_p ())
8750 dump_printf_loc (MSG_NOTE
, vect_location
,
8751 "vect_is_simple_use: operand ");
8752 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8753 dump_printf (MSG_NOTE
, "\n");
8756 if (CONSTANT_CLASS_P (operand
))
8758 *dt
= vect_constant_def
;
8762 if (is_gimple_min_invariant (operand
))
8764 *dt
= vect_external_def
;
8768 if (TREE_CODE (operand
) != SSA_NAME
)
8770 if (dump_enabled_p ())
8771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8776 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8778 *dt
= vect_external_def
;
8782 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8783 if (dump_enabled_p ())
8785 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8786 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8789 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
8790 *dt
= vect_external_def
;
8793 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8794 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8797 if (dump_enabled_p ())
8799 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8802 case vect_uninitialized_def
:
8803 dump_printf (MSG_NOTE
, "uninitialized\n");
8805 case vect_constant_def
:
8806 dump_printf (MSG_NOTE
, "constant\n");
8808 case vect_external_def
:
8809 dump_printf (MSG_NOTE
, "external\n");
8811 case vect_internal_def
:
8812 dump_printf (MSG_NOTE
, "internal\n");
8814 case vect_induction_def
:
8815 dump_printf (MSG_NOTE
, "induction\n");
8817 case vect_reduction_def
:
8818 dump_printf (MSG_NOTE
, "reduction\n");
8820 case vect_double_reduction_def
:
8821 dump_printf (MSG_NOTE
, "double reduction\n");
8823 case vect_nested_cycle
:
8824 dump_printf (MSG_NOTE
, "nested cycle\n");
8826 case vect_unknown_def_type
:
8827 dump_printf (MSG_NOTE
, "unknown\n");
8832 if (*dt
== vect_unknown_def_type
)
8834 if (dump_enabled_p ())
8835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8836 "Unsupported pattern.\n");
8840 switch (gimple_code (*def_stmt
))
8847 if (dump_enabled_p ())
8848 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8849 "unsupported defining stmt:\n");
8856 /* Function vect_is_simple_use.
8858 Same as vect_is_simple_use but also determines the vector operand
8859 type of OPERAND and stores it to *VECTYPE. If the definition of
8860 OPERAND is vect_uninitialized_def, vect_constant_def or
8861 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8862 is responsible to compute the best suited vector type for the
8866 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8867 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
8869 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
8872 /* Now get a vector type if the def is internal, otherwise supply
8873 NULL_TREE and leave it up to the caller to figure out a proper
8874 type for the use stmt. */
8875 if (*dt
== vect_internal_def
8876 || *dt
== vect_induction_def
8877 || *dt
== vect_reduction_def
8878 || *dt
== vect_double_reduction_def
8879 || *dt
== vect_nested_cycle
)
8881 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8883 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8884 && !STMT_VINFO_RELEVANT (stmt_info
)
8885 && !STMT_VINFO_LIVE_P (stmt_info
))
8886 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8888 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8889 gcc_assert (*vectype
!= NULL_TREE
);
8891 else if (*dt
== vect_uninitialized_def
8892 || *dt
== vect_constant_def
8893 || *dt
== vect_external_def
)
8894 *vectype
= NULL_TREE
;
8902 /* Function supportable_widening_operation
8904 Check whether an operation represented by the code CODE is a
8905 widening operation that is supported by the target platform in
8906 vector form (i.e., when operating on arguments of type VECTYPE_IN
8907 producing a result of type VECTYPE_OUT).
8909 Widening operations we currently support are NOP (CONVERT), FLOAT
8910 and WIDEN_MULT. This function checks if these operations are supported
8911 by the target platform either directly (via vector tree-codes), or via
8915 - CODE1 and CODE2 are codes of vector operations to be used when
8916 vectorizing the operation, if available.
8917 - MULTI_STEP_CVT determines the number of required intermediate steps in
8918 case of multi-step conversion (like char->short->int - in that case
8919 MULTI_STEP_CVT will be 1).
8920 - INTERM_TYPES contains the intermediate type required to perform the
8921 widening operation (short in the above example). */
8924 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
8925 tree vectype_out
, tree vectype_in
,
8926 enum tree_code
*code1
, enum tree_code
*code2
,
8927 int *multi_step_cvt
,
8928 vec
<tree
> *interm_types
)
8930 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8931 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8932 struct loop
*vect_loop
= NULL
;
8933 machine_mode vec_mode
;
8934 enum insn_code icode1
, icode2
;
8935 optab optab1
, optab2
;
8936 tree vectype
= vectype_in
;
8937 tree wide_vectype
= vectype_out
;
8938 enum tree_code c1
, c2
;
8940 tree prev_type
, intermediate_type
;
8941 machine_mode intermediate_mode
, prev_mode
;
8942 optab optab3
, optab4
;
8944 *multi_step_cvt
= 0;
8946 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8950 case WIDEN_MULT_EXPR
:
8951 /* The result of a vectorized widening operation usually requires
8952 two vectors (because the widened results do not fit into one vector).
8953 The generated vector results would normally be expected to be
8954 generated in the same order as in the original scalar computation,
8955 i.e. if 8 results are generated in each vector iteration, they are
8956 to be organized as follows:
8957 vect1: [res1,res2,res3,res4],
8958 vect2: [res5,res6,res7,res8].
8960 However, in the special case that the result of the widening
8961 operation is used in a reduction computation only, the order doesn't
8962 matter (because when vectorizing a reduction we change the order of
8963 the computation). Some targets can take advantage of this and
8964 generate more efficient code. For example, targets like Altivec,
8965 that support widen_mult using a sequence of {mult_even,mult_odd}
8966 generate the following vectors:
8967 vect1: [res1,res3,res5,res7],
8968 vect2: [res2,res4,res6,res8].
8970 When vectorizing outer-loops, we execute the inner-loop sequentially
8971 (each vectorized inner-loop iteration contributes to VF outer-loop
8972 iterations in parallel). We therefore don't allow to change the
8973 order of the computation in the inner-loop during outer-loop
8975 /* TODO: Another case in which order doesn't *really* matter is when we
8976 widen and then contract again, e.g. (short)((int)x * y >> 8).
8977 Normally, pack_trunc performs an even/odd permute, whereas the
8978 repack from an even/odd expansion would be an interleave, which
8979 would be significantly simpler for e.g. AVX2. */
8980 /* In any case, in order to avoid duplicating the code below, recurse
8981 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8982 are properly set up for the caller. If we fail, we'll continue with
8983 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8985 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8986 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8987 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8988 stmt
, vectype_out
, vectype_in
,
8989 code1
, code2
, multi_step_cvt
,
8992 /* Elements in a vector with vect_used_by_reduction property cannot
8993 be reordered if the use chain with this property does not have the
8994 same operation. One such an example is s += a * b, where elements
8995 in a and b cannot be reordered. Here we check if the vector defined
8996 by STMT is only directly used in the reduction statement. */
8997 tree lhs
= gimple_assign_lhs (stmt
);
8998 use_operand_p dummy
;
9000 stmt_vec_info use_stmt_info
= NULL
;
9001 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9002 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9003 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9006 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9007 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9020 case VEC_WIDEN_MULT_EVEN_EXPR
:
9021 /* Support the recursion induced just above. */
9022 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9023 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9026 case WIDEN_LSHIFT_EXPR
:
9027 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9028 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9032 c1
= VEC_UNPACK_LO_EXPR
;
9033 c2
= VEC_UNPACK_HI_EXPR
;
9037 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9038 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9041 case FIX_TRUNC_EXPR
:
9042 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9043 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9044 computing the operation. */
9051 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9054 if (code
== FIX_TRUNC_EXPR
)
9056 /* The signedness is determined from output operand. */
9057 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9058 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9062 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9063 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9066 if (!optab1
|| !optab2
)
9069 vec_mode
= TYPE_MODE (vectype
);
9070 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9071 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9077 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9078 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9079 /* For scalar masks we may have different boolean
9080 vector types having the same QImode. Thus we
9081 add additional check for elements number. */
9082 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9083 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9084 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9086 /* Check if it's a multi-step conversion that can be done using intermediate
9089 prev_type
= vectype
;
9090 prev_mode
= vec_mode
;
9092 if (!CONVERT_EXPR_CODE_P (code
))
9095 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9096 intermediate steps in promotion sequence. We try
9097 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9099 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9100 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9102 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9103 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9106 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9107 current_vector_size
);
9108 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9113 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9114 TYPE_UNSIGNED (prev_type
));
9116 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9117 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9119 if (!optab3
|| !optab4
9120 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9121 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9122 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9123 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9124 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9125 == CODE_FOR_nothing
)
9126 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9127 == CODE_FOR_nothing
))
9130 interm_types
->quick_push (intermediate_type
);
9131 (*multi_step_cvt
)++;
9133 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9134 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9135 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9136 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9137 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9139 prev_type
= intermediate_type
;
9140 prev_mode
= intermediate_mode
;
9143 interm_types
->release ();
9148 /* Function supportable_narrowing_operation
9150 Check whether an operation represented by the code CODE is a
9151 narrowing operation that is supported by the target platform in
9152 vector form (i.e., when operating on arguments of type VECTYPE_IN
9153 and producing a result of type VECTYPE_OUT).
9155 Narrowing operations we currently support are NOP (CONVERT) and
9156 FIX_TRUNC. This function checks if these operations are supported by
9157 the target platform directly via vector tree-codes.
9160 - CODE1 is the code of a vector operation to be used when
9161 vectorizing the operation, if available.
9162 - MULTI_STEP_CVT determines the number of required intermediate steps in
9163 case of multi-step conversion (like int->short->char - in that case
9164 MULTI_STEP_CVT will be 1).
9165 - INTERM_TYPES contains the intermediate type required to perform the
9166 narrowing operation (short in the above example). */
9169 supportable_narrowing_operation (enum tree_code code
,
9170 tree vectype_out
, tree vectype_in
,
9171 enum tree_code
*code1
, int *multi_step_cvt
,
9172 vec
<tree
> *interm_types
)
9174 machine_mode vec_mode
;
9175 enum insn_code icode1
;
9176 optab optab1
, interm_optab
;
9177 tree vectype
= vectype_in
;
9178 tree narrow_vectype
= vectype_out
;
9180 tree intermediate_type
, prev_type
;
9181 machine_mode intermediate_mode
, prev_mode
;
9185 *multi_step_cvt
= 0;
9189 c1
= VEC_PACK_TRUNC_EXPR
;
9192 case FIX_TRUNC_EXPR
:
9193 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9197 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9198 tree code and optabs used for computing the operation. */
9205 if (code
== FIX_TRUNC_EXPR
)
9206 /* The signedness is determined from output operand. */
9207 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9209 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9214 vec_mode
= TYPE_MODE (vectype
);
9215 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9220 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9221 /* For scalar masks we may have different boolean
9222 vector types having the same QImode. Thus we
9223 add additional check for elements number. */
9224 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9225 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9226 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9228 /* Check if it's a multi-step conversion that can be done using intermediate
9230 prev_mode
= vec_mode
;
9231 prev_type
= vectype
;
9232 if (code
== FIX_TRUNC_EXPR
)
9233 uns
= TYPE_UNSIGNED (vectype_out
);
9235 uns
= TYPE_UNSIGNED (vectype
);
9237 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9238 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9239 costly than signed. */
9240 if (code
== FIX_TRUNC_EXPR
&& uns
)
9242 enum insn_code icode2
;
9245 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9247 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9248 if (interm_optab
!= unknown_optab
9249 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9250 && insn_data
[icode1
].operand
[0].mode
9251 == insn_data
[icode2
].operand
[0].mode
)
9254 optab1
= interm_optab
;
9259 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9260 intermediate steps in promotion sequence. We try
9261 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9262 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9263 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9265 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9266 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9269 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9270 current_vector_size
);
9271 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9276 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9278 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9281 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9282 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9283 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9284 == CODE_FOR_nothing
))
9287 interm_types
->quick_push (intermediate_type
);
9288 (*multi_step_cvt
)++;
9290 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9291 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9292 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9293 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9295 prev_mode
= intermediate_mode
;
9296 prev_type
= intermediate_type
;
9297 optab1
= interm_optab
;
9300 interm_types
->release ();