1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type
{
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
68 return STMT_VINFO_VECTYPE (stmt_info
);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
76 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
77 basic_block bb
= gimple_bb (stmt
);
78 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
84 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
86 return (bb
->loop_father
== loop
->inner
);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
95 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
96 int misalign
, enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
103 kind
= vector_scatter_store
;
106 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
107 stmt_info_for_cost si
= { count
, kind
,
108 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
110 body_cost_vec
->safe_push (si
);
112 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
115 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
116 count
, kind
, stmt_info
, misalign
, where
);
119 /* Return a variable of type ELEM_TYPE[NELEMS]. */
122 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
124 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
128 /* ARRAY is an array of vectors created by create_vector_array.
129 Return an SSA_NAME for the vector in index N. The reference
130 is part of the vectorization of STMT and the vector is associated
131 with scalar destination SCALAR_DEST. */
134 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
135 tree array
, unsigned HOST_WIDE_INT n
)
137 tree vect_type
, vect
, vect_name
, array_ref
;
140 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
141 vect_type
= TREE_TYPE (TREE_TYPE (array
));
142 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
143 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
144 build_int_cst (size_type_node
, n
),
145 NULL_TREE
, NULL_TREE
);
147 new_stmt
= gimple_build_assign (vect
, array_ref
);
148 vect_name
= make_ssa_name (vect
, new_stmt
);
149 gimple_assign_set_lhs (new_stmt
, vect_name
);
150 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
155 /* ARRAY is an array of vectors created by create_vector_array.
156 Emit code to store SSA_NAME VECT in index N of the array.
157 The store is part of the vectorization of STMT. */
160 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
161 tree array
, unsigned HOST_WIDE_INT n
)
166 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
167 build_int_cst (size_type_node
, n
),
168 NULL_TREE
, NULL_TREE
);
170 new_stmt
= gimple_build_assign (array_ref
, vect
);
171 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
174 /* PTR is a pointer to an array of type TYPE. Return a representation
175 of *PTR. The memory reference replaces those in FIRST_DR
179 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
183 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
184 /* Arrays have the same alignment as their type. */
185 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
189 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
191 /* Function vect_mark_relevant.
193 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
197 enum vect_relevant relevant
, bool live_p
)
199 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
200 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
201 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
202 gimple
*pattern_stmt
;
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE
, vect_location
,
207 "mark relevant %d, live %d: ", relevant
, live_p
);
208 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_NOTE
, vect_location
,
226 "last stmt in pattern. don't mark"
227 " relevant/live.\n");
228 stmt_info
= vinfo_for_stmt (pattern_stmt
);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
230 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
231 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
235 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
236 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
237 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
239 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
240 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE
, vect_location
,
244 "already marked relevant/live.\n");
248 worklist
->safe_push (stmt
);
252 /* Function is_simple_and_all_uses_invariant
254 Return true if STMT is simple and all uses of it are invariant. */
257 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
263 if (!is_gimple_assign (stmt
))
266 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
268 enum vect_def_type dt
= vect_uninitialized_def
;
270 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
274 "use not simple.\n");
278 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT in loop that is represented by LOOP_VINFO is
287 "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
297 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
298 enum vect_relevant
*relevant
, bool *live_p
)
300 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
302 imm_use_iterator imm_iter
;
306 *relevant
= vect_unused_in_scope
;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt
)
311 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
312 != loop_exit_ctrl_vec_info_type
)
313 *relevant
= vect_used_in_scope
;
315 /* changing memory. */
316 if (gimple_code (stmt
) != GIMPLE_PHI
)
317 if (gimple_vdef (stmt
)
318 && !gimple_clobber_p (stmt
))
320 if (dump_enabled_p ())
321 dump_printf_loc (MSG_NOTE
, vect_location
,
322 "vec_stmt_relevant_p: stmt has vdefs.\n");
323 *relevant
= vect_used_in_scope
;
326 /* uses outside the loop. */
327 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
329 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
331 basic_block bb
= gimple_bb (USE_STMT (use_p
));
332 if (!flow_bb_inside_loop_p (loop
, bb
))
334 if (dump_enabled_p ())
335 dump_printf_loc (MSG_NOTE
, vect_location
,
336 "vec_stmt_relevant_p: used out of loop.\n");
338 if (is_gimple_debug (USE_STMT (use_p
)))
341 /* We expect all such uses to be in the loop exit phis
342 (because of loop closed form) */
343 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
344 gcc_assert (bb
== single_exit (loop
)->dest
);
351 if (*live_p
&& *relevant
== vect_unused_in_scope
352 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE
, vect_location
,
356 "vec_stmt_relevant_p: stmt live but not relevant.\n");
357 *relevant
= vect_used_only_live
;
360 return (*live_p
|| *relevant
);
364 /* Function exist_non_indexing_operands_for_use_p
366 USE is one of the uses attached to STMT. Check if USE is
367 used in STMT for anything other than indexing an array. */
370 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
373 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 if (!gimple_assign_copy_p (stmt
))
396 if (is_gimple_call (stmt
)
397 && gimple_call_internal_p (stmt
))
398 switch (gimple_call_internal_fn (stmt
))
401 operand
= gimple_call_arg (stmt
, 3);
406 operand
= gimple_call_arg (stmt
, 2);
416 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (stmt
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
450 skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
452 be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
461 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
462 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
463 stmt_vec_info dstmt_vinfo
;
464 basic_block bb
, def_bb
;
466 enum vect_def_type dt
;
468 /* case 1: we are only interested in uses that need to be vectorized. Uses
469 that are used for address computation are not considered relevant. */
470 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
473 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
477 "not vectorized: unsupported use in stmt.\n");
481 if (!def_stmt
|| gimple_nop_p (def_stmt
))
484 def_bb
= gimple_bb (def_stmt
);
485 if (!flow_bb_inside_loop_p (loop
, def_bb
))
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
492 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
493 DEF_STMT must have already been processed, because this should be the
494 only way that STMT, which is a reduction-phi, was put in the worklist,
495 as there should be no other uses for DEF_STMT in the loop. So we just
496 check that everything is as expected, and we are done. */
497 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
498 bb
= gimple_bb (stmt
);
499 if (gimple_code (stmt
) == GIMPLE_PHI
500 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
501 && gimple_code (def_stmt
) != GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
503 && bb
->loop_father
== def_bb
->loop_father
)
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "reduc-stmt defining reduc-phi in the same nest.\n");
508 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
509 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
510 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
511 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
512 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
516 /* case 3a: outer-loop stmt defining an inner-loop stmt:
517 outer-loop-header-bb:
523 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
525 if (dump_enabled_p ())
526 dump_printf_loc (MSG_NOTE
, vect_location
,
527 "outer-loop def-stmt defining inner-loop stmt.\n");
531 case vect_unused_in_scope
:
532 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
533 vect_used_in_scope
: vect_unused_in_scope
;
536 case vect_used_in_outer_by_reduction
:
537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
538 relevant
= vect_used_by_reduction
;
541 case vect_used_in_outer
:
542 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
543 relevant
= vect_used_in_scope
;
546 case vect_used_in_scope
:
554 /* case 3b: inner-loop stmt defining an outer-loop stmt:
555 outer-loop-header-bb:
559 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
561 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE
, vect_location
,
565 "inner-loop def-stmt defining outer-loop stmt.\n");
569 case vect_unused_in_scope
:
570 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
571 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
572 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
575 case vect_used_by_reduction
:
576 case vect_used_only_live
:
577 relevant
= vect_used_in_outer_by_reduction
;
580 case vect_used_in_scope
:
581 relevant
= vect_used_in_outer
;
588 /* We are also not interested in uses on loop PHI backedges that are
589 inductions. Otherwise we'll needlessly vectorize the IV increment
590 and cause hybrid SLP for SLP inductions. Unless the PHI is live
592 else if (gimple_code (stmt
) == GIMPLE_PHI
593 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
594 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
595 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
598 if (dump_enabled_p ())
599 dump_printf_loc (MSG_NOTE
, vect_location
,
600 "induction value on backedge.\n");
605 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
610 /* Function vect_mark_stmts_to_be_vectorized.
612 Not all stmts in the loop need to be vectorized. For example:
621 Stmt 1 and 3 do not need to be vectorized, because loop control and
622 addressing of vectorized data-refs are handled differently.
624 This pass detects such stmts. */
627 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
629 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
630 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
631 unsigned int nbbs
= loop
->num_nodes
;
632 gimple_stmt_iterator si
;
635 stmt_vec_info stmt_vinfo
;
639 enum vect_relevant relevant
;
641 if (dump_enabled_p ())
642 dump_printf_loc (MSG_NOTE
, vect_location
,
643 "=== vect_mark_stmts_to_be_vectorized ===\n");
645 auto_vec
<gimple
*, 64> worklist
;
647 /* 1. Init worklist. */
648 for (i
= 0; i
< nbbs
; i
++)
651 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
657 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
660 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
661 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
663 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
665 stmt
= gsi_stmt (si
);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
669 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
672 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
673 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
677 /* 2. Process_worklist */
678 while (worklist
.length () > 0)
683 stmt
= worklist
.pop ();
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
690 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
691 (DEF_STMT) as relevant/irrelevant according to the relevance property
693 stmt_vinfo
= vinfo_for_stmt (stmt
);
694 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
696 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
697 propagated as is to the DEF_STMTs of its USEs.
699 One exception is when STMT has been identified as defining a reduction
700 variable; in this case we set the relevance to vect_used_by_reduction.
701 This is because we distinguish between two kinds of relevant stmts -
702 those that are used by a reduction computation, and those that are
703 (also) used by a regular computation. This allows us later on to
704 identify stmts that are used solely by a reduction, and therefore the
705 order of the results that they produce does not have to be kept. */
707 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
709 case vect_reduction_def
:
710 gcc_assert (relevant
!= vect_unused_in_scope
);
711 if (relevant
!= vect_unused_in_scope
712 && relevant
!= vect_used_in_scope
713 && relevant
!= vect_used_by_reduction
714 && relevant
!= vect_used_only_live
)
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
718 "unsupported use of reduction.\n");
723 case vect_nested_cycle
:
724 if (relevant
!= vect_unused_in_scope
725 && relevant
!= vect_used_in_outer_by_reduction
726 && relevant
!= vect_used_in_outer
)
728 if (dump_enabled_p ())
729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
730 "unsupported use of nested cycle.\n");
736 case vect_double_reduction_def
:
737 if (relevant
!= vect_unused_in_scope
738 && relevant
!= vect_used_by_reduction
739 && relevant
!= vect_used_only_live
)
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
743 "unsupported use of double reduction.\n");
753 if (is_pattern_stmt_p (stmt_vinfo
))
755 /* Pattern statements are not inserted into the code, so
756 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
757 have to scan the RHS or function arguments instead. */
758 if (is_gimple_assign (stmt
))
760 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
761 tree op
= gimple_assign_rhs1 (stmt
);
764 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
766 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
767 relevant
, &worklist
, false)
768 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
769 relevant
, &worklist
, false))
773 for (; i
< gimple_num_ops (stmt
); i
++)
775 op
= gimple_op (stmt
, i
);
776 if (TREE_CODE (op
) == SSA_NAME
777 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
782 else if (is_gimple_call (stmt
))
784 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
786 tree arg
= gimple_call_arg (stmt
, i
);
787 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
794 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
796 tree op
= USE_FROM_PTR (use_p
);
797 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
802 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
804 gather_scatter_info gs_info
;
805 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
807 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
811 } /* while worklist */
817 /* Function vect_model_simple_cost.
819 Models cost for simple operations, i.e. those that only emit ncopies of a
820 single op. Right now, this does not account for multiple insns that could
821 be generated for the single vector op. We will handle that shortly. */
824 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
825 enum vect_def_type
*dt
,
827 stmt_vector_for_cost
*prologue_cost_vec
,
828 stmt_vector_for_cost
*body_cost_vec
)
831 int inside_cost
= 0, prologue_cost
= 0;
833 /* The SLP costs were already calculated during SLP tree build. */
834 if (PURE_SLP_STMT (stmt_info
))
837 /* Cost the "broadcast" of a scalar operand in to a vector operand.
838 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
840 for (i
= 0; i
< ndts
; i
++)
841 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
842 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
843 stmt_info
, 0, vect_prologue
);
845 /* Pass the inside-of-loop statements to the target-specific cost model. */
846 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
847 stmt_info
, 0, vect_body
);
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_NOTE
, vect_location
,
851 "vect_model_simple_cost: inside_cost = %d, "
852 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
856 /* Model cost for type demotion and promotion operations. PWR is normally
857 zero for single-step promotions and demotions. It will be one if
858 two-step promotion/demotion is required, and so on. Each additional
859 step doubles the number of instructions required. */
862 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
863 enum vect_def_type
*dt
, int pwr
)
866 int inside_cost
= 0, prologue_cost
= 0;
867 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
868 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
869 void *target_cost_data
;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info
))
876 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
878 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
880 for (i
= 0; i
< pwr
+ 1; i
++)
882 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
884 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
885 vec_promote_demote
, stmt_info
, 0,
889 /* FORNOW: Assuming maximum 2 args per stmts. */
890 for (i
= 0; i
< 2; i
++)
891 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
892 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
893 stmt_info
, 0, vect_prologue
);
895 if (dump_enabled_p ())
896 dump_printf_loc (MSG_NOTE
, vect_location
,
897 "vect_model_promotion_demotion_cost: inside_cost = %d, "
898 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
901 /* Function vect_model_store_cost
903 Models cost for stores. In the case of grouped accesses, one access
904 has the overhead of the grouped access attributed to it. */
907 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
908 vect_memory_access_type memory_access_type
,
909 enum vect_def_type dt
, slp_tree slp_node
,
910 stmt_vector_for_cost
*prologue_cost_vec
,
911 stmt_vector_for_cost
*body_cost_vec
)
913 unsigned int inside_cost
= 0, prologue_cost
= 0;
914 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
915 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
916 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
918 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
919 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
920 stmt_info
, 0, vect_prologue
);
922 /* Grouped stores update all elements in the group at once,
923 so we want the DR for the first statement. */
924 if (!slp_node
&& grouped_access_p
)
926 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
927 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
930 /* True if we should include any once-per-group costs as well as
931 the cost of the statement itself. For SLP we only get called
932 once per group anyhow. */
933 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
935 /* We assume that the cost of a single store-lanes instruction is
936 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
937 access is instead being provided by a permute-and-store operation,
938 include the cost of the permutes. */
940 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
942 /* Uses a high and low interleave or shuffle operations for each
944 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
945 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
946 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
947 stmt_info
, 0, vect_body
);
949 if (dump_enabled_p ())
950 dump_printf_loc (MSG_NOTE
, vect_location
,
951 "vect_model_store_cost: strided group_size = %d .\n",
955 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
956 /* Costs of the stores. */
957 if (memory_access_type
== VMAT_ELEMENTWISE
958 || memory_access_type
== VMAT_GATHER_SCATTER
)
959 /* N scalar stores plus extracting the elements. */
960 inside_cost
+= record_stmt_cost (body_cost_vec
,
961 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
962 scalar_store
, stmt_info
, 0, vect_body
);
964 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
966 if (memory_access_type
== VMAT_ELEMENTWISE
967 || memory_access_type
== VMAT_STRIDED_SLP
)
968 inside_cost
+= record_stmt_cost (body_cost_vec
,
969 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
970 vec_to_scalar
, stmt_info
, 0, vect_body
);
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE
, vect_location
,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
979 /* Calculate cost of DR's memory access. */
981 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
982 unsigned int *inside_cost
,
983 stmt_vector_for_cost
*body_cost_vec
)
985 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
986 gimple
*stmt
= DR_STMT (dr
);
987 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
989 switch (alignment_support_scheme
)
993 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
994 vector_store
, stmt_info
, 0,
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE
, vect_location
,
999 "vect_model_store_cost: aligned.\n");
1003 case dr_unaligned_supported
:
1005 /* Here, we assign an additional cost for the unaligned store. */
1006 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1007 unaligned_store
, stmt_info
,
1008 DR_MISALIGNMENT (dr
), vect_body
);
1009 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE
, vect_location
,
1011 "vect_model_store_cost: unaligned supported by "
1016 case dr_unaligned_unsupported
:
1018 *inside_cost
= VECT_MAX_COST
;
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1022 "vect_model_store_cost: unsupported access.\n");
1032 /* Function vect_model_load_cost
1034 Models cost for loads. In the case of grouped accesses, one access has
1035 the overhead of the grouped access attributed to it. Since unaligned
1036 accesses are supported for loads, we also account for the costs of the
1037 access scheme chosen. */
1040 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1041 vect_memory_access_type memory_access_type
,
1043 stmt_vector_for_cost
*prologue_cost_vec
,
1044 stmt_vector_for_cost
*body_cost_vec
)
1046 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1047 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1048 unsigned int inside_cost
= 0, prologue_cost
= 0;
1049 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1051 /* Grouped loads read all elements in the group at once,
1052 so we want the DR for the first statement. */
1053 if (!slp_node
&& grouped_access_p
)
1055 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1056 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1059 /* True if we should include any once-per-group costs as well as
1060 the cost of the statement itself. For SLP we only get called
1061 once per group anyhow. */
1062 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1064 /* We assume that the cost of a single load-lanes instruction is
1065 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1066 access is instead being provided by a load-and-permute operation,
1067 include the cost of the permutes. */
1069 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1071 /* Uses an even and odd extract operations or shuffle operations
1072 for each needed permute. */
1073 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1074 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1075 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1076 stmt_info
, 0, vect_body
);
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE
, vect_location
,
1080 "vect_model_load_cost: strided group_size = %d .\n",
1084 /* The loads themselves. */
1085 if (memory_access_type
== VMAT_ELEMENTWISE
1086 || memory_access_type
== VMAT_GATHER_SCATTER
)
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1090 inside_cost
+= record_stmt_cost (body_cost_vec
,
1091 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1092 scalar_load
, stmt_info
, 0, vect_body
);
1095 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1096 &inside_cost
, &prologue_cost
,
1097 prologue_cost_vec
, body_cost_vec
, true);
1098 if (memory_access_type
== VMAT_ELEMENTWISE
1099 || memory_access_type
== VMAT_STRIDED_SLP
)
1100 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1101 stmt_info
, 0, vect_body
);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE
, vect_location
,
1105 "vect_model_load_cost: inside_cost = %d, "
1106 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1110 /* Calculate cost of DR's memory access. */
1112 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1113 bool add_realign_cost
, unsigned int *inside_cost
,
1114 unsigned int *prologue_cost
,
1115 stmt_vector_for_cost
*prologue_cost_vec
,
1116 stmt_vector_for_cost
*body_cost_vec
,
1117 bool record_prologue_costs
)
1119 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1120 gimple
*stmt
= DR_STMT (dr
);
1121 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1123 switch (alignment_support_scheme
)
1127 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1128 stmt_info
, 0, vect_body
);
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE
, vect_location
,
1132 "vect_model_load_cost: aligned.\n");
1136 case dr_unaligned_supported
:
1138 /* Here, we assign an additional cost for the unaligned load. */
1139 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1140 unaligned_load
, stmt_info
,
1141 DR_MISALIGNMENT (dr
), vect_body
);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE
, vect_location
,
1145 "vect_model_load_cost: unaligned supported by "
1150 case dr_explicit_realign
:
1152 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1153 vector_load
, stmt_info
, 0, vect_body
);
1154 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1155 vec_perm
, stmt_info
, 0, vect_body
);
1157 /* FIXME: If the misalignment remains fixed across the iterations of
1158 the containing loop, the following cost should be added to the
1160 if (targetm
.vectorize
.builtin_mask_for_load
)
1161 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1162 stmt_info
, 0, vect_body
);
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE
, vect_location
,
1166 "vect_model_load_cost: explicit realign\n");
1170 case dr_explicit_realign_optimized
:
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE
, vect_location
,
1174 "vect_model_load_cost: unaligned software "
1177 /* Unaligned software pipeline has a load of an address, an initial
1178 load, and possibly a mask operation to "prime" the loop. However,
1179 if this is an access in a group of loads, which provide grouped
1180 access, then the above cost should only be considered for one
1181 access in the group. Inside the loop, there is a load op
1182 and a realignment op. */
1184 if (add_realign_cost
&& record_prologue_costs
)
1186 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1187 vector_stmt
, stmt_info
,
1189 if (targetm
.vectorize
.builtin_mask_for_load
)
1190 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1191 vector_stmt
, stmt_info
,
1195 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1196 stmt_info
, 0, vect_body
);
1197 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1198 stmt_info
, 0, vect_body
);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE
, vect_location
,
1202 "vect_model_load_cost: explicit realign optimized"
1208 case dr_unaligned_unsupported
:
1210 *inside_cost
= VECT_MAX_COST
;
1212 if (dump_enabled_p ())
1213 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1214 "vect_model_load_cost: unsupported access.\n");
1223 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1224 the loop preheader for the vectorized stmt STMT. */
1227 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1230 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1233 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1234 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1238 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1242 if (nested_in_vect_loop_p (loop
, stmt
))
1245 pe
= loop_preheader_edge (loop
);
1246 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1247 gcc_assert (!new_bb
);
1251 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1253 gimple_stmt_iterator gsi_bb_start
;
1255 gcc_assert (bb_vinfo
);
1256 bb
= BB_VINFO_BB (bb_vinfo
);
1257 gsi_bb_start
= gsi_after_labels (bb
);
1258 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1262 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
,
1265 "created new init_stmt: ");
1266 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1270 /* Function vect_init_vector.
1272 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1273 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1274 vector type a vector with all elements equal to VAL is created first.
1275 Place the initialization at BSI if it is not NULL. Otherwise, place the
1276 initialization at the loop preheader.
1277 Return the DEF of INIT_STMT.
1278 It will be used in the vectorization of STMT. */
1281 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1286 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1287 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1289 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1290 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1292 /* Scalar boolean value should be transformed into
1293 all zeros or all ones value before building a vector. */
1294 if (VECTOR_BOOLEAN_TYPE_P (type
))
1296 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1297 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1299 if (CONSTANT_CLASS_P (val
))
1300 val
= integer_zerop (val
) ? false_val
: true_val
;
1303 new_temp
= make_ssa_name (TREE_TYPE (type
));
1304 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1305 val
, true_val
, false_val
);
1306 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1310 else if (CONSTANT_CLASS_P (val
))
1311 val
= fold_convert (TREE_TYPE (type
), val
);
1314 new_temp
= make_ssa_name (TREE_TYPE (type
));
1315 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1316 init_stmt
= gimple_build_assign (new_temp
,
1317 fold_build1 (VIEW_CONVERT_EXPR
,
1321 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1322 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1326 val
= build_vector_from_val (type
, val
);
1329 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1330 init_stmt
= gimple_build_assign (new_temp
, val
);
1331 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1335 /* Function vect_get_vec_def_for_operand_1.
1337 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1338 DT that will be used in the vectorized stmt. */
1341 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1345 stmt_vec_info def_stmt_info
= NULL
;
1349 /* operand is a constant or a loop invariant. */
1350 case vect_constant_def
:
1351 case vect_external_def
:
1352 /* Code should use vect_get_vec_def_for_operand. */
1355 /* operand is defined inside the loop. */
1356 case vect_internal_def
:
1358 /* Get the def from the vectorized stmt. */
1359 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1361 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1362 /* Get vectorized pattern statement. */
1364 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1365 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1366 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1367 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1368 gcc_assert (vec_stmt
);
1369 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1370 vec_oprnd
= PHI_RESULT (vec_stmt
);
1371 else if (is_gimple_call (vec_stmt
))
1372 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1374 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1378 /* operand is defined by a loop header phi. */
1379 case vect_reduction_def
:
1380 case vect_double_reduction_def
:
1381 case vect_nested_cycle
:
1382 case vect_induction_def
:
1384 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1386 /* Get the def from the vectorized stmt. */
1387 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1388 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1389 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1390 vec_oprnd
= PHI_RESULT (vec_stmt
);
1392 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1402 /* Function vect_get_vec_def_for_operand.
1404 OP is an operand in STMT. This function returns a (vector) def that will be
1405 used in the vectorized stmt for STMT.
1407 In the case that OP is an SSA_NAME which is defined in the loop, then
1408 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410 In case OP is an invariant or constant, a new stmt that creates a vector def
1411 needs to be introduced. VECTYPE may be used to specify a required type for
1412 vector invariant. */
1415 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1418 enum vect_def_type dt
;
1420 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1421 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1423 if (dump_enabled_p ())
1425 dump_printf_loc (MSG_NOTE
, vect_location
,
1426 "vect_get_vec_def_for_operand: ");
1427 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1428 dump_printf (MSG_NOTE
, "\n");
1431 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1432 gcc_assert (is_simple_use
);
1433 if (def_stmt
&& dump_enabled_p ())
1435 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1436 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1439 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1441 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1445 vector_type
= vectype
;
1446 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1447 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1448 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1450 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1452 gcc_assert (vector_type
);
1453 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1456 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1460 /* Function vect_get_vec_def_for_stmt_copy
1462 Return a vector-def for an operand. This function is used when the
1463 vectorized stmt to be created (by the caller to this function) is a "copy"
1464 created in case the vectorized result cannot fit in one vector, and several
1465 copies of the vector-stmt are required. In this case the vector-def is
1466 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1467 of the stmt that defines VEC_OPRND.
1468 DT is the type of the vector def VEC_OPRND.
1471 In case the vectorization factor (VF) is bigger than the number
1472 of elements that can fit in a vectype (nunits), we have to generate
1473 more than one vector stmt to vectorize the scalar stmt. This situation
1474 arises when there are multiple data-types operated upon in the loop; the
1475 smallest data-type determines the VF, and as a result, when vectorizing
1476 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1477 vector stmt (each computing a vector of 'nunits' results, and together
1478 computing 'VF' results in each iteration). This function is called when
1479 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1480 which VF=16 and nunits=4, so the number of copies required is 4):
1482 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1484 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1485 VS1.1: vx.1 = memref1 VS1.2
1486 VS1.2: vx.2 = memref2 VS1.3
1487 VS1.3: vx.3 = memref3
1489 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1490 VSnew.1: vz1 = vx.1 + ... VSnew.2
1491 VSnew.2: vz2 = vx.2 + ... VSnew.3
1492 VSnew.3: vz3 = vx.3 + ...
1494 The vectorization of S1 is explained in vectorizable_load.
1495 The vectorization of S2:
1496 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1497 the function 'vect_get_vec_def_for_operand' is called to
1498 get the relevant vector-def for each operand of S2. For operand x it
1499 returns the vector-def 'vx.0'.
1501 To create the remaining copies of the vector-stmt (VSnew.j), this
1502 function is called to get the relevant vector-def for each operand. It is
1503 obtained from the respective VS1.j stmt, which is recorded in the
1504 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506 For example, to obtain the vector-def 'vx.1' in order to create the
1507 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1508 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1509 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1510 and return its def ('vx.1').
1511 Overall, to create the above sequence this function will be called 3 times:
1512 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1513 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1514 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1517 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1519 gimple
*vec_stmt_for_operand
;
1520 stmt_vec_info def_stmt_info
;
1522 /* Do nothing; can reuse same def. */
1523 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1526 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1527 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1528 gcc_assert (def_stmt_info
);
1529 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1530 gcc_assert (vec_stmt_for_operand
);
1531 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1532 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1534 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1539 /* Get vectorized definitions for the operands to create a copy of an original
1540 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1543 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1544 vec
<tree
> *vec_oprnds0
,
1545 vec
<tree
> *vec_oprnds1
)
1547 tree vec_oprnd
= vec_oprnds0
->pop ();
1549 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1550 vec_oprnds0
->quick_push (vec_oprnd
);
1552 if (vec_oprnds1
&& vec_oprnds1
->length ())
1554 vec_oprnd
= vec_oprnds1
->pop ();
1555 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1556 vec_oprnds1
->quick_push (vec_oprnd
);
1561 /* Get vectorized definitions for OP0 and OP1. */
1564 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1565 vec
<tree
> *vec_oprnds0
,
1566 vec
<tree
> *vec_oprnds1
,
1571 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1572 auto_vec
<tree
> ops (nops
);
1573 auto_vec
<vec
<tree
> > vec_defs (nops
);
1575 ops
.quick_push (op0
);
1577 ops
.quick_push (op1
);
1579 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1581 *vec_oprnds0
= vec_defs
[0];
1583 *vec_oprnds1
= vec_defs
[1];
1589 vec_oprnds0
->create (1);
1590 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1591 vec_oprnds0
->quick_push (vec_oprnd
);
1595 vec_oprnds1
->create (1);
1596 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1597 vec_oprnds1
->quick_push (vec_oprnd
);
1603 /* Function vect_finish_stmt_generation.
1605 Insert a new stmt. */
1608 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1609 gimple_stmt_iterator
*gsi
)
1611 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1612 vec_info
*vinfo
= stmt_info
->vinfo
;
1614 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1616 if (!gsi_end_p (*gsi
)
1617 && gimple_has_mem_ops (vec_stmt
))
1619 gimple
*at_stmt
= gsi_stmt (*gsi
);
1620 tree vuse
= gimple_vuse (at_stmt
);
1621 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1623 tree vdef
= gimple_vdef (at_stmt
);
1624 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1625 /* If we have an SSA vuse and insert a store, update virtual
1626 SSA form to avoid triggering the renamer. Do so only
1627 if we can easily see all uses - which is what almost always
1628 happens with the way vectorized stmts are inserted. */
1629 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1630 && ((is_gimple_assign (vec_stmt
)
1631 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1632 || (is_gimple_call (vec_stmt
)
1633 && !(gimple_call_flags (vec_stmt
)
1634 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1636 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1637 gimple_set_vdef (vec_stmt
, new_vdef
);
1638 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1642 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1644 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1646 if (dump_enabled_p ())
1648 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1649 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1652 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1654 /* While EH edges will generally prevent vectorization, stmt might
1655 e.g. be in a must-not-throw region. Ensure newly created stmts
1656 that could throw are part of the same region. */
1657 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1658 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1659 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1662 /* We want to vectorize a call to combined function CFN with function
1663 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1664 as the types of all inputs. Check whether this is possible using
1665 an internal function, returning its code if so or IFN_LAST if not. */
1668 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1669 tree vectype_out
, tree vectype_in
)
1672 if (internal_fn_p (cfn
))
1673 ifn
= as_internal_fn (cfn
);
1675 ifn
= associated_internal_fn (fndecl
);
1676 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1678 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1679 if (info
.vectorizable
)
1681 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1682 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1683 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1684 OPTIMIZE_FOR_SPEED
))
1692 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1693 gimple_stmt_iterator
*);
1695 /* STMT is a non-strided load or store, meaning that it accesses
1696 elements with a known constant step. Return -1 if that step
1697 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1700 compare_step_with_zero (gimple
*stmt
)
1702 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1703 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1704 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
1708 /* If the target supports a permute mask that reverses the elements in
1709 a vector of type VECTYPE, return that mask, otherwise return null. */
1712 perm_mask_for_reverse (tree vectype
)
1716 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1718 auto_vec_perm_indices
sel (nunits
);
1719 for (i
= 0; i
< nunits
; ++i
)
1720 sel
.quick_push (nunits
- 1 - i
);
1722 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
))
1724 return vect_gen_perm_mask_checked (vectype
, sel
);
1727 /* A subroutine of get_load_store_type, with a subset of the same
1728 arguments. Handle the case where STMT is part of a grouped load
1731 For stores, the statements in the group are all consecutive
1732 and there is no gap at the end. For loads, the statements in the
1733 group might not be consecutive; there can be gaps between statements
1734 as well as at the end. */
1737 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1738 vec_load_store_type vls_type
,
1739 vect_memory_access_type
*memory_access_type
)
1741 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1742 vec_info
*vinfo
= stmt_info
->vinfo
;
1743 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1744 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1745 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1746 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1747 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1748 bool single_element_p
= (stmt
== first_stmt
1749 && !GROUP_NEXT_ELEMENT (stmt_info
));
1750 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1751 unsigned nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1753 /* True if the vectorized statements would access beyond the last
1754 statement in the group. */
1755 bool overrun_p
= false;
1757 /* True if we can cope with such overrun by peeling for gaps, so that
1758 there is at least one final scalar iteration after the vector loop. */
1759 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1761 /* There can only be a gap at the end of the group if the stride is
1762 known at compile time. */
1763 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1765 /* Stores can't yet have gaps. */
1766 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1770 if (STMT_VINFO_STRIDED_P (stmt_info
))
1772 /* Try to use consecutive accesses of GROUP_SIZE elements,
1773 separated by the stride, until we have a complete vector.
1774 Fall back to scalar accesses if that isn't possible. */
1775 if (nunits
% group_size
== 0)
1776 *memory_access_type
= VMAT_STRIDED_SLP
;
1778 *memory_access_type
= VMAT_ELEMENTWISE
;
1782 overrun_p
= loop_vinfo
&& gap
!= 0;
1783 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1786 "Grouped store with gaps requires"
1787 " non-consecutive accesses\n");
1790 /* An overrun is fine if the trailing elements are smaller
1791 than the alignment boundary B. Every vector access will
1792 be a multiple of B and so we are guaranteed to access a
1793 non-gap element in the same B-sized block. */
1795 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1796 / vect_get_scalar_dr_size (first_dr
)))
1798 if (overrun_p
&& !can_overrun_p
)
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1802 "Peeling for outer loop is not supported\n");
1805 *memory_access_type
= VMAT_CONTIGUOUS
;
1810 /* We can always handle this case using elementwise accesses,
1811 but see if something more efficient is available. */
1812 *memory_access_type
= VMAT_ELEMENTWISE
;
1814 /* If there is a gap at the end of the group then these optimizations
1815 would access excess elements in the last iteration. */
1816 bool would_overrun_p
= (gap
!= 0);
1817 /* An overrun is fine if the trailing elements are smaller than the
1818 alignment boundary B. Every vector access will be a multiple of B
1819 and so we are guaranteed to access a non-gap element in the
1820 same B-sized block. */
1822 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1823 / vect_get_scalar_dr_size (first_dr
)))
1824 would_overrun_p
= false;
1826 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1827 && (can_overrun_p
|| !would_overrun_p
)
1828 && compare_step_with_zero (stmt
) > 0)
1830 /* First try using LOAD/STORE_LANES. */
1831 if (vls_type
== VLS_LOAD
1832 ? vect_load_lanes_supported (vectype
, group_size
)
1833 : vect_store_lanes_supported (vectype
, group_size
))
1835 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1836 overrun_p
= would_overrun_p
;
1839 /* If that fails, try using permuting loads. */
1840 if (*memory_access_type
== VMAT_ELEMENTWISE
1841 && (vls_type
== VLS_LOAD
1842 ? vect_grouped_load_supported (vectype
, single_element_p
,
1844 : vect_grouped_store_supported (vectype
, group_size
)))
1846 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1847 overrun_p
= would_overrun_p
;
1852 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1854 /* STMT is the leader of the group. Check the operands of all the
1855 stmts of the group. */
1856 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1859 gcc_assert (gimple_assign_single_p (next_stmt
));
1860 tree op
= gimple_assign_rhs1 (next_stmt
);
1862 enum vect_def_type dt
;
1863 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1867 "use not simple.\n");
1870 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1876 gcc_assert (can_overrun_p
);
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1879 "Data access with gaps requires scalar "
1881 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1887 /* A subroutine of get_load_store_type, with a subset of the same
1888 arguments. Handle the case where STMT is a load or store that
1889 accesses consecutive elements with a negative step. */
1891 static vect_memory_access_type
1892 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1893 vec_load_store_type vls_type
,
1894 unsigned int ncopies
)
1896 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1897 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1898 dr_alignment_support alignment_support_scheme
;
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1904 "multiple types with negative step.\n");
1905 return VMAT_ELEMENTWISE
;
1908 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1909 if (alignment_support_scheme
!= dr_aligned
1910 && alignment_support_scheme
!= dr_unaligned_supported
)
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1914 "negative step but alignment required.\n");
1915 return VMAT_ELEMENTWISE
;
1918 if (vls_type
== VLS_STORE_INVARIANT
)
1920 if (dump_enabled_p ())
1921 dump_printf_loc (MSG_NOTE
, vect_location
,
1922 "negative step with invariant source;"
1923 " no permute needed.\n");
1924 return VMAT_CONTIGUOUS_DOWN
;
1927 if (!perm_mask_for_reverse (vectype
))
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1931 "negative step and reversing not supported.\n");
1932 return VMAT_ELEMENTWISE
;
1935 return VMAT_CONTIGUOUS_REVERSE
;
1938 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1939 if there is a memory access type that the vectorized form can use,
1940 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1941 or scatters, fill in GS_INFO accordingly.
1943 SLP says whether we're performing SLP rather than loop vectorization.
1944 VECTYPE is the vector type that the vectorized statements will use.
1945 NCOPIES is the number of vector statements that will be needed. */
1948 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1949 vec_load_store_type vls_type
, unsigned int ncopies
,
1950 vect_memory_access_type
*memory_access_type
,
1951 gather_scatter_info
*gs_info
)
1953 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1954 vec_info
*vinfo
= stmt_info
->vinfo
;
1955 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1956 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1958 *memory_access_type
= VMAT_GATHER_SCATTER
;
1960 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1962 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1963 &gs_info
->offset_dt
,
1964 &gs_info
->offset_vectype
))
1966 if (dump_enabled_p ())
1967 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1968 "%s index use not simple.\n",
1969 vls_type
== VLS_LOAD
? "gather" : "scatter");
1973 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1975 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1976 memory_access_type
))
1979 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1982 *memory_access_type
= VMAT_ELEMENTWISE
;
1986 int cmp
= compare_step_with_zero (stmt
);
1988 *memory_access_type
= get_negative_load_store_type
1989 (stmt
, vectype
, vls_type
, ncopies
);
1992 gcc_assert (vls_type
== VLS_LOAD
);
1993 *memory_access_type
= VMAT_INVARIANT
;
1996 *memory_access_type
= VMAT_CONTIGUOUS
;
1999 /* FIXME: At the moment the cost model seems to underestimate the
2000 cost of using elementwise accesses. This check preserves the
2001 traditional behavior until that can be fixed. */
2002 if (*memory_access_type
== VMAT_ELEMENTWISE
2003 && !STMT_VINFO_STRIDED_P (stmt_info
))
2005 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2007 "not falling back to elementwise accesses\n");
2013 /* Function vectorizable_mask_load_store.
2015 Check if STMT performs a conditional load or store that can be vectorized.
2016 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2017 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2018 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2021 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2022 gimple
**vec_stmt
, slp_tree slp_node
)
2024 tree vec_dest
= NULL
;
2025 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2026 stmt_vec_info prev_stmt_info
;
2027 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2028 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2029 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2030 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2031 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2032 tree rhs_vectype
= NULL_TREE
;
2037 tree dataref_ptr
= NULL_TREE
;
2039 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2043 gather_scatter_info gs_info
;
2044 vec_load_store_type vls_type
;
2047 enum vect_def_type dt
;
2049 if (slp_node
!= NULL
)
2052 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2053 gcc_assert (ncopies
>= 1);
2055 mask
= gimple_call_arg (stmt
, 2);
2057 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2060 /* FORNOW. This restriction should be relaxed. */
2061 if (nested_in_vect_loop
&& ncopies
> 1)
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2065 "multiple types in nested loop.");
2069 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2072 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2076 if (!STMT_VINFO_DATA_REF (stmt_info
))
2079 elem_type
= TREE_TYPE (vectype
);
2081 if (TREE_CODE (mask
) != SSA_NAME
)
2084 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2088 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2090 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2091 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2094 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2096 tree rhs
= gimple_call_arg (stmt
, 3);
2097 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2099 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2100 vls_type
= VLS_STORE_INVARIANT
;
2102 vls_type
= VLS_STORE
;
2105 vls_type
= VLS_LOAD
;
2107 vect_memory_access_type memory_access_type
;
2108 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2109 &memory_access_type
, &gs_info
))
2112 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2114 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2116 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2117 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2121 "masked gather with integer mask not supported.");
2125 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2129 "unsupported access type for masked %s.\n",
2130 vls_type
== VLS_LOAD
? "load" : "store");
2133 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2134 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2135 TYPE_MODE (mask_vectype
),
2136 vls_type
== VLS_LOAD
)
2138 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2141 if (!vec_stmt
) /* transformation not required. */
2143 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2144 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2145 if (vls_type
== VLS_LOAD
)
2146 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2149 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2150 dt
, NULL
, NULL
, NULL
);
2153 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2157 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2159 tree vec_oprnd0
= NULL_TREE
, op
;
2160 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2161 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2162 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2163 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2164 tree mask_perm_mask
= NULL_TREE
;
2165 edge pe
= loop_preheader_edge (loop
);
2168 enum { NARROW
, NONE
, WIDEN
} modifier
;
2169 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2171 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2172 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2173 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2174 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2175 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2176 scaletype
= TREE_VALUE (arglist
);
2177 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2178 && types_compatible_p (srctype
, masktype
));
2180 if (nunits
== gather_off_nunits
)
2182 else if (nunits
== gather_off_nunits
/ 2)
2186 auto_vec_perm_indices
sel (gather_off_nunits
);
2187 for (i
= 0; i
< gather_off_nunits
; ++i
)
2188 sel
.quick_push (i
| nunits
);
2190 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
2192 else if (nunits
== gather_off_nunits
* 2)
2196 auto_vec_perm_indices
sel (nunits
);
2197 sel
.quick_grow (nunits
);
2198 for (i
= 0; i
< nunits
; ++i
)
2199 sel
[i
] = i
< gather_off_nunits
2200 ? i
: i
+ nunits
- gather_off_nunits
;
2202 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
2204 for (i
= 0; i
< nunits
; ++i
)
2205 sel
[i
] = i
| gather_off_nunits
;
2206 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
2211 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2213 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2214 if (!is_gimple_min_invariant (ptr
))
2216 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2217 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2218 gcc_assert (!new_bb
);
2221 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2223 prev_stmt_info
= NULL
;
2224 for (j
= 0; j
< ncopies
; ++j
)
2226 if (modifier
== WIDEN
&& (j
& 1))
2227 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2228 perm_mask
, stmt
, gsi
);
2231 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2234 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2236 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2238 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2239 == TYPE_VECTOR_SUBPARTS (idxtype
));
2240 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2241 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2243 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2244 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2248 if (mask_perm_mask
&& (j
& 1))
2249 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2250 mask_perm_mask
, stmt
, gsi
);
2254 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2257 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2258 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2262 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2264 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2265 == TYPE_VECTOR_SUBPARTS (masktype
));
2266 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2267 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2269 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2270 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2276 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2279 if (!useless_type_conversion_p (vectype
, rettype
))
2281 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2282 == TYPE_VECTOR_SUBPARTS (rettype
));
2283 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2284 gimple_call_set_lhs (new_stmt
, op
);
2285 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2286 var
= make_ssa_name (vec_dest
);
2287 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2288 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2292 var
= make_ssa_name (vec_dest
, new_stmt
);
2293 gimple_call_set_lhs (new_stmt
, var
);
2296 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2298 if (modifier
== NARROW
)
2305 var
= permute_vec_elements (prev_res
, var
,
2306 perm_mask
, stmt
, gsi
);
2307 new_stmt
= SSA_NAME_DEF_STMT (var
);
2310 if (prev_stmt_info
== NULL
)
2311 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2313 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2314 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2317 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2319 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2321 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2322 stmt_info
= vinfo_for_stmt (stmt
);
2324 tree lhs
= gimple_call_lhs (stmt
);
2325 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2326 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2327 set_vinfo_for_stmt (stmt
, NULL
);
2328 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2329 gsi_replace (gsi
, new_stmt
, true);
2332 else if (vls_type
!= VLS_LOAD
)
2334 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2335 prev_stmt_info
= NULL
;
2336 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2337 for (i
= 0; i
< ncopies
; i
++)
2339 unsigned align
, misalign
;
2343 tree rhs
= gimple_call_arg (stmt
, 3);
2344 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2345 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2347 /* We should have catched mismatched types earlier. */
2348 gcc_assert (useless_type_conversion_p (vectype
,
2349 TREE_TYPE (vec_rhs
)));
2350 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2351 NULL_TREE
, &dummy
, gsi
,
2352 &ptr_incr
, false, &inv_p
);
2353 gcc_assert (!inv_p
);
2357 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2358 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2359 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2360 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2361 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2362 TYPE_SIZE_UNIT (vectype
));
2365 align
= DR_TARGET_ALIGNMENT (dr
);
2366 if (aligned_access_p (dr
))
2368 else if (DR_MISALIGNMENT (dr
) == -1)
2370 align
= TYPE_ALIGN_UNIT (elem_type
);
2374 misalign
= DR_MISALIGNMENT (dr
);
2375 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2377 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2378 misalign
? least_bit_hwi (misalign
) : align
);
2380 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2381 ptr
, vec_mask
, vec_rhs
);
2382 gimple_call_set_nothrow (call
, true);
2384 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2386 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2388 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2389 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2394 tree vec_mask
= NULL_TREE
;
2395 prev_stmt_info
= NULL
;
2396 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2397 for (i
= 0; i
< ncopies
; i
++)
2399 unsigned align
, misalign
;
2403 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2405 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2406 NULL_TREE
, &dummy
, gsi
,
2407 &ptr_incr
, false, &inv_p
);
2408 gcc_assert (!inv_p
);
2412 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2413 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2414 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2415 TYPE_SIZE_UNIT (vectype
));
2418 align
= DR_TARGET_ALIGNMENT (dr
);
2419 if (aligned_access_p (dr
))
2421 else if (DR_MISALIGNMENT (dr
) == -1)
2423 align
= TYPE_ALIGN_UNIT (elem_type
);
2427 misalign
= DR_MISALIGNMENT (dr
);
2428 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2430 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2431 misalign
? least_bit_hwi (misalign
) : align
);
2433 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2435 gimple_call_set_lhs (call
, make_ssa_name (vec_dest
));
2436 gimple_call_set_nothrow (call
, true);
2437 vect_finish_stmt_generation (stmt
, call
, gsi
);
2439 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= call
;
2441 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = call
;
2442 prev_stmt_info
= vinfo_for_stmt (call
);
2446 if (vls_type
== VLS_LOAD
)
2448 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2450 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2452 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2453 stmt_info
= vinfo_for_stmt (stmt
);
2455 tree lhs
= gimple_call_lhs (stmt
);
2456 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2457 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2458 set_vinfo_for_stmt (stmt
, NULL
);
2459 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2460 gsi_replace (gsi
, new_stmt
, true);
2466 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2469 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2470 gimple
**vec_stmt
, slp_tree slp_node
,
2471 tree vectype_in
, enum vect_def_type
*dt
)
2474 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2475 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2476 unsigned ncopies
, nunits
;
2478 op
= gimple_call_arg (stmt
, 0);
2479 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2480 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2482 /* Multiple types in SLP are handled by creating the appropriate number of
2483 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2488 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2490 gcc_assert (ncopies
>= 1);
2492 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2496 unsigned int num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2497 unsigned word_bytes
= num_bytes
/ nunits
;
2499 auto_vec_perm_indices
elts (num_bytes
);
2500 for (unsigned i
= 0; i
< nunits
; ++i
)
2501 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2502 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2504 if (! can_vec_perm_p (TYPE_MODE (char_vectype
), false, &elts
))
2509 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2513 if (! PURE_SLP_STMT (stmt_info
))
2515 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2516 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2517 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2518 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2523 auto_vec
<tree
, 32> telts (num_bytes
);
2524 for (unsigned i
= 0; i
< num_bytes
; ++i
)
2525 telts
.quick_push (build_int_cst (char_type_node
, elts
[i
]));
2526 tree bswap_vconst
= build_vector (char_vectype
, telts
);
2529 vec
<tree
> vec_oprnds
= vNULL
;
2530 gimple
*new_stmt
= NULL
;
2531 stmt_vec_info prev_stmt_info
= NULL
;
2532 for (unsigned j
= 0; j
< ncopies
; j
++)
2536 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2538 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2540 /* Arguments are ready. create the new vector stmt. */
2543 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2545 tree tem
= make_ssa_name (char_vectype
);
2546 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2547 char_vectype
, vop
));
2548 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2549 tree tem2
= make_ssa_name (char_vectype
);
2550 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2551 tem
, tem
, bswap_vconst
);
2552 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2553 tem
= make_ssa_name (vectype
);
2554 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2556 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2558 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2565 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2567 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2569 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2572 vec_oprnds
.release ();
2576 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2577 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2578 in a single step. On success, store the binary pack code in
2582 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2583 tree_code
*convert_code
)
2585 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2586 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2590 int multi_step_cvt
= 0;
2591 auto_vec
<tree
, 8> interm_types
;
2592 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2593 &code
, &multi_step_cvt
,
2598 *convert_code
= code
;
2602 /* Function vectorizable_call.
2604 Check if GS performs a function call that can be vectorized.
2605 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2606 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2607 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2610 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2617 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2618 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2619 tree vectype_out
, vectype_in
;
2622 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2623 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2624 vec_info
*vinfo
= stmt_info
->vinfo
;
2625 tree fndecl
, new_temp
, rhs_type
;
2627 enum vect_def_type dt
[3]
2628 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2630 gimple
*new_stmt
= NULL
;
2632 vec
<tree
> vargs
= vNULL
;
2633 enum { NARROW
, NONE
, WIDEN
} modifier
;
2637 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2640 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2644 /* Is GS a vectorizable call? */
2645 stmt
= dyn_cast
<gcall
*> (gs
);
2649 if (gimple_call_internal_p (stmt
)
2650 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2651 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2652 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2655 if (gimple_call_lhs (stmt
) == NULL_TREE
2656 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2659 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2661 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2663 /* Process function arguments. */
2664 rhs_type
= NULL_TREE
;
2665 vectype_in
= NULL_TREE
;
2666 nargs
= gimple_call_num_args (stmt
);
2668 /* Bail out if the function has more than three arguments, we do not have
2669 interesting builtin functions to vectorize with more than two arguments
2670 except for fma. No arguments is also not good. */
2671 if (nargs
== 0 || nargs
> 3)
2674 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2675 if (gimple_call_internal_p (stmt
)
2676 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2679 rhs_type
= unsigned_type_node
;
2682 for (i
= 0; i
< nargs
; i
++)
2686 op
= gimple_call_arg (stmt
, i
);
2688 /* We can only handle calls with arguments of the same type. */
2690 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2692 if (dump_enabled_p ())
2693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2694 "argument types differ.\n");
2698 rhs_type
= TREE_TYPE (op
);
2700 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2702 if (dump_enabled_p ())
2703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2704 "use not simple.\n");
2709 vectype_in
= opvectype
;
2711 && opvectype
!= vectype_in
)
2713 if (dump_enabled_p ())
2714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2715 "argument vector types differ.\n");
2719 /* If all arguments are external or constant defs use a vector type with
2720 the same size as the output vector type. */
2722 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2724 gcc_assert (vectype_in
);
2727 if (dump_enabled_p ())
2729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2730 "no vectype for scalar type ");
2731 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2732 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2739 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2740 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2741 if (nunits_in
== nunits_out
/ 2)
2743 else if (nunits_out
== nunits_in
)
2745 else if (nunits_out
== nunits_in
/ 2)
2750 /* We only handle functions that do not read or clobber memory. */
2751 if (gimple_vuse (stmt
))
2753 if (dump_enabled_p ())
2754 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2755 "function reads from or writes to memory.\n");
2759 /* For now, we only vectorize functions if a target specific builtin
2760 is available. TODO -- in some cases, it might be profitable to
2761 insert the calls for pieces of the vector, in order to be able
2762 to vectorize other operations in the loop. */
2764 internal_fn ifn
= IFN_LAST
;
2765 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2766 tree callee
= gimple_call_fndecl (stmt
);
2768 /* First try using an internal function. */
2769 tree_code convert_code
= ERROR_MARK
;
2771 && (modifier
== NONE
2772 || (modifier
== NARROW
2773 && simple_integer_narrowing (vectype_out
, vectype_in
,
2775 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2778 /* If that fails, try asking for a target-specific built-in function. */
2779 if (ifn
== IFN_LAST
)
2781 if (cfn
!= CFN_LAST
)
2782 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2783 (cfn
, vectype_out
, vectype_in
);
2785 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2786 (callee
, vectype_out
, vectype_in
);
2789 if (ifn
== IFN_LAST
&& !fndecl
)
2791 if (cfn
== CFN_GOMP_SIMD_LANE
2794 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2795 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2796 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2797 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2799 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2800 { 0, 1, 2, ... vf - 1 } vector. */
2801 gcc_assert (nargs
== 0);
2803 else if (modifier
== NONE
2804 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2805 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2806 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2807 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2813 "function is not vectorizable.\n");
2820 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2821 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
2823 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
2825 /* Sanity check: make sure that at least one copy of the vectorized stmt
2826 needs to be generated. */
2827 gcc_assert (ncopies
>= 1);
2829 if (!vec_stmt
) /* transformation not required. */
2831 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2832 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2835 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
2836 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2837 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2838 vec_promote_demote
, stmt_info
, 0, vect_body
);
2845 if (dump_enabled_p ())
2846 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2849 scalar_dest
= gimple_call_lhs (stmt
);
2850 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2852 prev_stmt_info
= NULL
;
2853 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2855 tree prev_res
= NULL_TREE
;
2856 for (j
= 0; j
< ncopies
; ++j
)
2858 /* Build argument list for the vectorized call. */
2860 vargs
.create (nargs
);
2866 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2867 vec
<tree
> vec_oprnds0
;
2869 for (i
= 0; i
< nargs
; i
++)
2870 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2871 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
2872 vec_oprnds0
= vec_defs
[0];
2874 /* Arguments are ready. Create the new vector stmt. */
2875 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2878 for (k
= 0; k
< nargs
; k
++)
2880 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2881 vargs
[k
] = vec_oprndsk
[i
];
2883 if (modifier
== NARROW
)
2885 tree half_res
= make_ssa_name (vectype_in
);
2887 = gimple_build_call_internal_vec (ifn
, vargs
);
2888 gimple_call_set_lhs (call
, half_res
);
2889 gimple_call_set_nothrow (call
, true);
2891 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2894 prev_res
= half_res
;
2897 new_temp
= make_ssa_name (vec_dest
);
2898 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2899 prev_res
, half_res
);
2904 if (ifn
!= IFN_LAST
)
2905 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2907 call
= gimple_build_call_vec (fndecl
, vargs
);
2908 new_temp
= make_ssa_name (vec_dest
, call
);
2909 gimple_call_set_lhs (call
, new_temp
);
2910 gimple_call_set_nothrow (call
, true);
2913 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2914 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2917 for (i
= 0; i
< nargs
; i
++)
2919 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2920 vec_oprndsi
.release ();
2925 for (i
= 0; i
< nargs
; i
++)
2927 op
= gimple_call_arg (stmt
, i
);
2930 = vect_get_vec_def_for_operand (op
, stmt
);
2933 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2935 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2938 vargs
.quick_push (vec_oprnd0
);
2941 if (gimple_call_internal_p (stmt
)
2942 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2944 auto_vec
<tree
, 32> v (nunits_out
);
2945 for (int k
= 0; k
< nunits_out
; ++k
)
2946 v
.quick_push (build_int_cst (unsigned_type_node
,
2947 j
* nunits_out
+ k
));
2948 tree cst
= build_vector (vectype_out
, v
);
2950 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2951 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2952 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2953 new_temp
= make_ssa_name (vec_dest
);
2954 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2956 else if (modifier
== NARROW
)
2958 tree half_res
= make_ssa_name (vectype_in
);
2959 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
2960 gimple_call_set_lhs (call
, half_res
);
2961 gimple_call_set_nothrow (call
, true);
2963 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2966 prev_res
= half_res
;
2969 new_temp
= make_ssa_name (vec_dest
);
2970 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2971 prev_res
, half_res
);
2976 if (ifn
!= IFN_LAST
)
2977 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2979 call
= gimple_build_call_vec (fndecl
, vargs
);
2980 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2981 gimple_call_set_lhs (call
, new_temp
);
2982 gimple_call_set_nothrow (call
, true);
2985 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2987 if (j
== (modifier
== NARROW
? 1 : 0))
2988 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2990 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2992 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2995 else if (modifier
== NARROW
)
2997 for (j
= 0; j
< ncopies
; ++j
)
2999 /* Build argument list for the vectorized call. */
3001 vargs
.create (nargs
* 2);
3007 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3008 vec
<tree
> vec_oprnds0
;
3010 for (i
= 0; i
< nargs
; i
++)
3011 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3012 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3013 vec_oprnds0
= vec_defs
[0];
3015 /* Arguments are ready. Create the new vector stmt. */
3016 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3020 for (k
= 0; k
< nargs
; k
++)
3022 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3023 vargs
.quick_push (vec_oprndsk
[i
]);
3024 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3027 if (ifn
!= IFN_LAST
)
3028 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3030 call
= gimple_build_call_vec (fndecl
, vargs
);
3031 new_temp
= make_ssa_name (vec_dest
, call
);
3032 gimple_call_set_lhs (call
, new_temp
);
3033 gimple_call_set_nothrow (call
, true);
3035 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3036 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3039 for (i
= 0; i
< nargs
; i
++)
3041 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3042 vec_oprndsi
.release ();
3047 for (i
= 0; i
< nargs
; i
++)
3049 op
= gimple_call_arg (stmt
, i
);
3053 = vect_get_vec_def_for_operand (op
, stmt
);
3055 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3059 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3061 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3063 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3066 vargs
.quick_push (vec_oprnd0
);
3067 vargs
.quick_push (vec_oprnd1
);
3070 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3071 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3072 gimple_call_set_lhs (new_stmt
, new_temp
);
3073 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3076 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3078 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3080 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3083 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3086 /* No current target implements this case. */
3091 /* The call in STMT might prevent it from being removed in dce.
3092 We however cannot remove it here, due to the way the ssa name
3093 it defines is mapped to the new definition. So just replace
3094 rhs of the statement with something harmless. */
3099 type
= TREE_TYPE (scalar_dest
);
3100 if (is_pattern_stmt_p (stmt_info
))
3101 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3103 lhs
= gimple_call_lhs (stmt
);
3105 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3106 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3107 set_vinfo_for_stmt (stmt
, NULL
);
3108 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3109 gsi_replace (gsi
, new_stmt
, false);
3115 struct simd_call_arg_info
3119 HOST_WIDE_INT linear_step
;
3120 enum vect_def_type dt
;
3122 bool simd_lane_linear
;
3125 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3126 is linear within simd lane (but not within whole loop), note it in
3130 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3131 struct simd_call_arg_info
*arginfo
)
3133 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3135 if (!is_gimple_assign (def_stmt
)
3136 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3137 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3140 tree base
= gimple_assign_rhs1 (def_stmt
);
3141 HOST_WIDE_INT linear_step
= 0;
3142 tree v
= gimple_assign_rhs2 (def_stmt
);
3143 while (TREE_CODE (v
) == SSA_NAME
)
3146 def_stmt
= SSA_NAME_DEF_STMT (v
);
3147 if (is_gimple_assign (def_stmt
))
3148 switch (gimple_assign_rhs_code (def_stmt
))
3151 t
= gimple_assign_rhs2 (def_stmt
);
3152 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3154 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3155 v
= gimple_assign_rhs1 (def_stmt
);
3158 t
= gimple_assign_rhs2 (def_stmt
);
3159 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3161 linear_step
= tree_to_shwi (t
);
3162 v
= gimple_assign_rhs1 (def_stmt
);
3165 t
= gimple_assign_rhs1 (def_stmt
);
3166 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3167 || (TYPE_PRECISION (TREE_TYPE (v
))
3168 < TYPE_PRECISION (TREE_TYPE (t
))))
3177 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3179 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3180 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3185 arginfo
->linear_step
= linear_step
;
3187 arginfo
->simd_lane_linear
= true;
3193 /* Function vectorizable_simd_clone_call.
3195 Check if STMT performs a function call that can be vectorized
3196 by calling a simd clone of the function.
3197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3202 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3203 gimple
**vec_stmt
, slp_tree slp_node
)
3208 tree vec_oprnd0
= NULL_TREE
;
3209 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3211 unsigned int nunits
;
3212 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3213 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3214 vec_info
*vinfo
= stmt_info
->vinfo
;
3215 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3216 tree fndecl
, new_temp
;
3218 gimple
*new_stmt
= NULL
;
3220 auto_vec
<simd_call_arg_info
> arginfo
;
3221 vec
<tree
> vargs
= vNULL
;
3223 tree lhs
, rtype
, ratype
;
3224 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3226 /* Is STMT a vectorizable call? */
3227 if (!is_gimple_call (stmt
))
3230 fndecl
= gimple_call_fndecl (stmt
);
3231 if (fndecl
== NULL_TREE
)
3234 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3235 if (node
== NULL
|| node
->simd_clones
== NULL
)
3238 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3241 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3245 if (gimple_call_lhs (stmt
)
3246 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3249 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3251 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3253 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3260 /* Process function arguments. */
3261 nargs
= gimple_call_num_args (stmt
);
3263 /* Bail out if the function has zero arguments. */
3267 arginfo
.reserve (nargs
, true);
3269 for (i
= 0; i
< nargs
; i
++)
3271 simd_call_arg_info thisarginfo
;
3274 thisarginfo
.linear_step
= 0;
3275 thisarginfo
.align
= 0;
3276 thisarginfo
.op
= NULL_TREE
;
3277 thisarginfo
.simd_lane_linear
= false;
3279 op
= gimple_call_arg (stmt
, i
);
3280 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3281 &thisarginfo
.vectype
)
3282 || thisarginfo
.dt
== vect_uninitialized_def
)
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3286 "use not simple.\n");
3290 if (thisarginfo
.dt
== vect_constant_def
3291 || thisarginfo
.dt
== vect_external_def
)
3292 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3294 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3296 /* For linear arguments, the analyze phase should have saved
3297 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3298 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3299 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3301 gcc_assert (vec_stmt
);
3302 thisarginfo
.linear_step
3303 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3305 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3306 thisarginfo
.simd_lane_linear
3307 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3308 == boolean_true_node
);
3309 /* If loop has been peeled for alignment, we need to adjust it. */
3310 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3311 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3312 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3314 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3315 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3316 tree opt
= TREE_TYPE (thisarginfo
.op
);
3317 bias
= fold_convert (TREE_TYPE (step
), bias
);
3318 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3320 = fold_build2 (POINTER_TYPE_P (opt
)
3321 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3322 thisarginfo
.op
, bias
);
3326 && thisarginfo
.dt
!= vect_constant_def
3327 && thisarginfo
.dt
!= vect_external_def
3329 && TREE_CODE (op
) == SSA_NAME
3330 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3332 && tree_fits_shwi_p (iv
.step
))
3334 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3335 thisarginfo
.op
= iv
.base
;
3337 else if ((thisarginfo
.dt
== vect_constant_def
3338 || thisarginfo
.dt
== vect_external_def
)
3339 && POINTER_TYPE_P (TREE_TYPE (op
)))
3340 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3341 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3343 if (POINTER_TYPE_P (TREE_TYPE (op
))
3344 && !thisarginfo
.linear_step
3346 && thisarginfo
.dt
!= vect_constant_def
3347 && thisarginfo
.dt
!= vect_external_def
3350 && TREE_CODE (op
) == SSA_NAME
)
3351 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3353 arginfo
.quick_push (thisarginfo
);
3356 unsigned int badness
= 0;
3357 struct cgraph_node
*bestn
= NULL
;
3358 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3359 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3361 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3362 n
= n
->simdclone
->next_clone
)
3364 unsigned int this_badness
= 0;
3365 if (n
->simdclone
->simdlen
3366 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3367 || n
->simdclone
->nargs
!= nargs
)
3369 if (n
->simdclone
->simdlen
3370 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3371 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3372 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3373 if (n
->simdclone
->inbranch
)
3374 this_badness
+= 2048;
3375 int target_badness
= targetm
.simd_clone
.usable (n
);
3376 if (target_badness
< 0)
3378 this_badness
+= target_badness
* 512;
3379 /* FORNOW: Have to add code to add the mask argument. */
3380 if (n
->simdclone
->inbranch
)
3382 for (i
= 0; i
< nargs
; i
++)
3384 switch (n
->simdclone
->args
[i
].arg_type
)
3386 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3387 if (!useless_type_conversion_p
3388 (n
->simdclone
->args
[i
].orig_type
,
3389 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3391 else if (arginfo
[i
].dt
== vect_constant_def
3392 || arginfo
[i
].dt
== vect_external_def
3393 || arginfo
[i
].linear_step
)
3396 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3397 if (arginfo
[i
].dt
!= vect_constant_def
3398 && arginfo
[i
].dt
!= vect_external_def
)
3401 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3402 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3403 if (arginfo
[i
].dt
== vect_constant_def
3404 || arginfo
[i
].dt
== vect_external_def
3405 || (arginfo
[i
].linear_step
3406 != n
->simdclone
->args
[i
].linear_step
))
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3411 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3412 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3413 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3414 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3418 case SIMD_CLONE_ARG_TYPE_MASK
:
3421 if (i
== (size_t) -1)
3423 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3428 if (arginfo
[i
].align
)
3429 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3430 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3432 if (i
== (size_t) -1)
3434 if (bestn
== NULL
|| this_badness
< badness
)
3437 badness
= this_badness
;
3444 for (i
= 0; i
< nargs
; i
++)
3445 if ((arginfo
[i
].dt
== vect_constant_def
3446 || arginfo
[i
].dt
== vect_external_def
)
3447 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3450 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3452 if (arginfo
[i
].vectype
== NULL
3453 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3454 > bestn
->simdclone
->simdlen
))
3458 fndecl
= bestn
->decl
;
3459 nunits
= bestn
->simdclone
->simdlen
;
3460 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3462 /* If the function isn't const, only allow it in simd loops where user
3463 has asserted that at least nunits consecutive iterations can be
3464 performed using SIMD instructions. */
3465 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3466 && gimple_vuse (stmt
))
3469 /* Sanity check: make sure that at least one copy of the vectorized stmt
3470 needs to be generated. */
3471 gcc_assert (ncopies
>= 1);
3473 if (!vec_stmt
) /* transformation not required. */
3475 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3476 for (i
= 0; i
< nargs
; i
++)
3477 if ((bestn
->simdclone
->args
[i
].arg_type
3478 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3479 || (bestn
->simdclone
->args
[i
].arg_type
3480 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3482 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3484 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3485 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3486 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3487 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3488 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3489 tree sll
= arginfo
[i
].simd_lane_linear
3490 ? boolean_true_node
: boolean_false_node
;
3491 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3493 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3494 if (dump_enabled_p ())
3495 dump_printf_loc (MSG_NOTE
, vect_location
,
3496 "=== vectorizable_simd_clone_call ===\n");
3497 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3503 if (dump_enabled_p ())
3504 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3507 scalar_dest
= gimple_call_lhs (stmt
);
3508 vec_dest
= NULL_TREE
;
3513 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3514 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3515 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3518 rtype
= TREE_TYPE (ratype
);
3522 prev_stmt_info
= NULL
;
3523 for (j
= 0; j
< ncopies
; ++j
)
3525 /* Build argument list for the vectorized call. */
3527 vargs
.create (nargs
);
3531 for (i
= 0; i
< nargs
; i
++)
3533 unsigned int k
, l
, m
, o
;
3535 op
= gimple_call_arg (stmt
, i
);
3536 switch (bestn
->simdclone
->args
[i
].arg_type
)
3538 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3539 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3540 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3541 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3543 if (TYPE_VECTOR_SUBPARTS (atype
)
3544 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3546 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3547 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3548 / TYPE_VECTOR_SUBPARTS (atype
));
3549 gcc_assert ((k
& (k
- 1)) == 0);
3552 = vect_get_vec_def_for_operand (op
, stmt
);
3555 vec_oprnd0
= arginfo
[i
].op
;
3556 if ((m
& (k
- 1)) == 0)
3558 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3561 arginfo
[i
].op
= vec_oprnd0
;
3563 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3565 bitsize_int ((m
& (k
- 1)) * prec
));
3567 = gimple_build_assign (make_ssa_name (atype
),
3569 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3570 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3574 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3575 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3576 gcc_assert ((k
& (k
- 1)) == 0);
3577 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3579 vec_alloc (ctor_elts
, k
);
3582 for (l
= 0; l
< k
; l
++)
3584 if (m
== 0 && l
== 0)
3586 = vect_get_vec_def_for_operand (op
, stmt
);
3589 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3591 arginfo
[i
].op
= vec_oprnd0
;
3594 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3598 vargs
.safe_push (vec_oprnd0
);
3601 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3603 = gimple_build_assign (make_ssa_name (atype
),
3605 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3606 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3611 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3612 vargs
.safe_push (op
);
3614 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3615 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3620 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3625 edge pe
= loop_preheader_edge (loop
);
3626 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3627 gcc_assert (!new_bb
);
3629 if (arginfo
[i
].simd_lane_linear
)
3631 vargs
.safe_push (arginfo
[i
].op
);
3634 tree phi_res
= copy_ssa_name (op
);
3635 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3636 set_vinfo_for_stmt (new_phi
,
3637 new_stmt_vec_info (new_phi
, loop_vinfo
));
3638 add_phi_arg (new_phi
, arginfo
[i
].op
,
3639 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3641 = POINTER_TYPE_P (TREE_TYPE (op
))
3642 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3643 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3644 ? sizetype
: TREE_TYPE (op
);
3646 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3648 tree tcst
= wide_int_to_tree (type
, cst
);
3649 tree phi_arg
= copy_ssa_name (op
);
3651 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3652 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3653 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3654 set_vinfo_for_stmt (new_stmt
,
3655 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3656 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3658 arginfo
[i
].op
= phi_res
;
3659 vargs
.safe_push (phi_res
);
3664 = POINTER_TYPE_P (TREE_TYPE (op
))
3665 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3666 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3667 ? sizetype
: TREE_TYPE (op
);
3669 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3671 tree tcst
= wide_int_to_tree (type
, cst
);
3672 new_temp
= make_ssa_name (TREE_TYPE (op
));
3673 new_stmt
= gimple_build_assign (new_temp
, code
,
3674 arginfo
[i
].op
, tcst
);
3675 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3676 vargs
.safe_push (new_temp
);
3679 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3680 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3681 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3682 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3683 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3684 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3690 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3693 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3695 new_temp
= create_tmp_var (ratype
);
3696 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3697 == TYPE_VECTOR_SUBPARTS (rtype
))
3698 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3700 new_temp
= make_ssa_name (rtype
, new_stmt
);
3701 gimple_call_set_lhs (new_stmt
, new_temp
);
3703 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3707 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3710 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3711 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3712 gcc_assert ((k
& (k
- 1)) == 0);
3713 for (l
= 0; l
< k
; l
++)
3718 t
= build_fold_addr_expr (new_temp
);
3719 t
= build2 (MEM_REF
, vectype
, t
,
3720 build_int_cst (TREE_TYPE (t
),
3721 l
* prec
/ BITS_PER_UNIT
));
3724 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3725 bitsize_int (prec
), bitsize_int (l
* prec
));
3727 = gimple_build_assign (make_ssa_name (vectype
), t
);
3728 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3729 if (j
== 0 && l
== 0)
3730 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3732 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3734 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3739 tree clobber
= build_constructor (ratype
, NULL
);
3740 TREE_THIS_VOLATILE (clobber
) = 1;
3741 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3742 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3746 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3748 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3749 / TYPE_VECTOR_SUBPARTS (rtype
));
3750 gcc_assert ((k
& (k
- 1)) == 0);
3751 if ((j
& (k
- 1)) == 0)
3752 vec_alloc (ret_ctor_elts
, k
);
3755 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3756 for (m
= 0; m
< o
; m
++)
3758 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3759 size_int (m
), NULL_TREE
, NULL_TREE
);
3761 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3762 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3763 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3764 gimple_assign_lhs (new_stmt
));
3766 tree clobber
= build_constructor (ratype
, NULL
);
3767 TREE_THIS_VOLATILE (clobber
) = 1;
3768 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3769 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3772 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3773 if ((j
& (k
- 1)) != k
- 1)
3775 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3777 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3778 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3780 if ((unsigned) j
== k
- 1)
3781 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3783 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3785 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3790 tree t
= build_fold_addr_expr (new_temp
);
3791 t
= build2 (MEM_REF
, vectype
, t
,
3792 build_int_cst (TREE_TYPE (t
), 0));
3794 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3795 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3796 tree clobber
= build_constructor (ratype
, NULL
);
3797 TREE_THIS_VOLATILE (clobber
) = 1;
3798 vect_finish_stmt_generation (stmt
,
3799 gimple_build_assign (new_temp
,
3805 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3807 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3809 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3814 /* The call in STMT might prevent it from being removed in dce.
3815 We however cannot remove it here, due to the way the ssa name
3816 it defines is mapped to the new definition. So just replace
3817 rhs of the statement with something harmless. */
3824 type
= TREE_TYPE (scalar_dest
);
3825 if (is_pattern_stmt_p (stmt_info
))
3826 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3828 lhs
= gimple_call_lhs (stmt
);
3829 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3832 new_stmt
= gimple_build_nop ();
3833 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3834 set_vinfo_for_stmt (stmt
, NULL
);
3835 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3836 gsi_replace (gsi
, new_stmt
, true);
3837 unlink_stmt_vdef (stmt
);
3843 /* Function vect_gen_widened_results_half
3845 Create a vector stmt whose code, type, number of arguments, and result
3846 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3847 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3848 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3849 needs to be created (DECL is a function-decl of a target-builtin).
3850 STMT is the original scalar stmt that we are vectorizing. */
3853 vect_gen_widened_results_half (enum tree_code code
,
3855 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3856 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3862 /* Generate half of the widened result: */
3863 if (code
== CALL_EXPR
)
3865 /* Target specific support */
3866 if (op_type
== binary_op
)
3867 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3869 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3870 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3871 gimple_call_set_lhs (new_stmt
, new_temp
);
3875 /* Generic support */
3876 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3877 if (op_type
!= binary_op
)
3879 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3880 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3881 gimple_assign_set_lhs (new_stmt
, new_temp
);
3883 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3889 /* Get vectorized definitions for loop-based vectorization. For the first
3890 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3891 scalar operand), and for the rest we get a copy with
3892 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3893 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3894 The vectors are collected into VEC_OPRNDS. */
3897 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3898 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3902 /* Get first vector operand. */
3903 /* All the vector operands except the very first one (that is scalar oprnd)
3905 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3906 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3908 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3910 vec_oprnds
->quick_push (vec_oprnd
);
3912 /* Get second vector operand. */
3913 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3914 vec_oprnds
->quick_push (vec_oprnd
);
3918 /* For conversion in multiple steps, continue to get operands
3921 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3925 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3926 For multi-step conversions store the resulting vectors and call the function
3930 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3931 int multi_step_cvt
, gimple
*stmt
,
3933 gimple_stmt_iterator
*gsi
,
3934 slp_tree slp_node
, enum tree_code code
,
3935 stmt_vec_info
*prev_stmt_info
)
3938 tree vop0
, vop1
, new_tmp
, vec_dest
;
3940 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3942 vec_dest
= vec_dsts
.pop ();
3944 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3946 /* Create demotion operation. */
3947 vop0
= (*vec_oprnds
)[i
];
3948 vop1
= (*vec_oprnds
)[i
+ 1];
3949 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3950 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3951 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3952 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3955 /* Store the resulting vector for next recursive call. */
3956 (*vec_oprnds
)[i
/2] = new_tmp
;
3959 /* This is the last step of the conversion sequence. Store the
3960 vectors in SLP_NODE or in vector info of the scalar statement
3961 (or in STMT_VINFO_RELATED_STMT chain). */
3963 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3966 if (!*prev_stmt_info
)
3967 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3969 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3971 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3976 /* For multi-step demotion operations we first generate demotion operations
3977 from the source type to the intermediate types, and then combine the
3978 results (stored in VEC_OPRNDS) in demotion operation to the destination
3982 /* At each level of recursion we have half of the operands we had at the
3984 vec_oprnds
->truncate ((i
+1)/2);
3985 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3986 stmt
, vec_dsts
, gsi
, slp_node
,
3987 VEC_PACK_TRUNC_EXPR
,
3991 vec_dsts
.quick_push (vec_dest
);
3995 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3996 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3997 the resulting vectors and call the function recursively. */
4000 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4001 vec
<tree
> *vec_oprnds1
,
4002 gimple
*stmt
, tree vec_dest
,
4003 gimple_stmt_iterator
*gsi
,
4004 enum tree_code code1
,
4005 enum tree_code code2
, tree decl1
,
4006 tree decl2
, int op_type
)
4009 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4010 gimple
*new_stmt1
, *new_stmt2
;
4011 vec
<tree
> vec_tmp
= vNULL
;
4013 vec_tmp
.create (vec_oprnds0
->length () * 2);
4014 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4016 if (op_type
== binary_op
)
4017 vop1
= (*vec_oprnds1
)[i
];
4021 /* Generate the two halves of promotion operation. */
4022 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4023 op_type
, vec_dest
, gsi
, stmt
);
4024 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4025 op_type
, vec_dest
, gsi
, stmt
);
4026 if (is_gimple_call (new_stmt1
))
4028 new_tmp1
= gimple_call_lhs (new_stmt1
);
4029 new_tmp2
= gimple_call_lhs (new_stmt2
);
4033 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4034 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4037 /* Store the results for the next step. */
4038 vec_tmp
.quick_push (new_tmp1
);
4039 vec_tmp
.quick_push (new_tmp2
);
4042 vec_oprnds0
->release ();
4043 *vec_oprnds0
= vec_tmp
;
4047 /* Check if STMT performs a conversion operation, that can be vectorized.
4048 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4049 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4050 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4053 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4054 gimple
**vec_stmt
, slp_tree slp_node
)
4058 tree op0
, op1
= NULL_TREE
;
4059 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4060 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4061 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4062 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4063 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4064 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4067 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4069 gimple
*new_stmt
= NULL
;
4070 stmt_vec_info prev_stmt_info
;
4073 tree vectype_out
, vectype_in
;
4075 tree lhs_type
, rhs_type
;
4076 enum { NARROW
, NONE
, WIDEN
} modifier
;
4077 vec
<tree
> vec_oprnds0
= vNULL
;
4078 vec
<tree
> vec_oprnds1
= vNULL
;
4080 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4081 vec_info
*vinfo
= stmt_info
->vinfo
;
4082 int multi_step_cvt
= 0;
4083 vec
<tree
> interm_types
= vNULL
;
4084 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4086 unsigned short fltsz
;
4088 /* Is STMT a vectorizable conversion? */
4090 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4093 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4097 if (!is_gimple_assign (stmt
))
4100 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4103 code
= gimple_assign_rhs_code (stmt
);
4104 if (!CONVERT_EXPR_CODE_P (code
)
4105 && code
!= FIX_TRUNC_EXPR
4106 && code
!= FLOAT_EXPR
4107 && code
!= WIDEN_MULT_EXPR
4108 && code
!= WIDEN_LSHIFT_EXPR
)
4111 op_type
= TREE_CODE_LENGTH (code
);
4113 /* Check types of lhs and rhs. */
4114 scalar_dest
= gimple_assign_lhs (stmt
);
4115 lhs_type
= TREE_TYPE (scalar_dest
);
4116 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4118 op0
= gimple_assign_rhs1 (stmt
);
4119 rhs_type
= TREE_TYPE (op0
);
4121 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4122 && !((INTEGRAL_TYPE_P (lhs_type
)
4123 && INTEGRAL_TYPE_P (rhs_type
))
4124 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4125 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4128 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4129 && ((INTEGRAL_TYPE_P (lhs_type
)
4130 && !type_has_mode_precision_p (lhs_type
))
4131 || (INTEGRAL_TYPE_P (rhs_type
)
4132 && !type_has_mode_precision_p (rhs_type
))))
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4136 "type conversion to/from bit-precision unsupported."
4141 /* Check the operands of the operation. */
4142 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4144 if (dump_enabled_p ())
4145 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4146 "use not simple.\n");
4149 if (op_type
== binary_op
)
4153 op1
= gimple_assign_rhs2 (stmt
);
4154 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4155 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4157 if (CONSTANT_CLASS_P (op0
))
4158 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4160 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4164 if (dump_enabled_p ())
4165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4166 "use not simple.\n");
4171 /* If op0 is an external or constant defs use a vector type of
4172 the same size as the output vector type. */
4174 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4176 gcc_assert (vectype_in
);
4179 if (dump_enabled_p ())
4181 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4182 "no vectype for scalar type ");
4183 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4184 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4190 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4191 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4193 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4196 "can't convert between boolean and non "
4198 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4199 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4205 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4206 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4207 if (nunits_in
< nunits_out
)
4209 else if (nunits_out
== nunits_in
)
4214 /* Multiple types in SLP are handled by creating the appropriate number of
4215 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4219 else if (modifier
== NARROW
)
4220 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4222 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4224 /* Sanity check: make sure that at least one copy of the vectorized stmt
4225 needs to be generated. */
4226 gcc_assert (ncopies
>= 1);
4228 bool found_mode
= false;
4229 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4230 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4231 opt_scalar_mode rhs_mode_iter
;
4233 /* Supportable by target? */
4237 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4239 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4244 if (dump_enabled_p ())
4245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4246 "conversion not supported by target.\n");
4250 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4251 &code1
, &code2
, &multi_step_cvt
,
4254 /* Binary widening operation can only be supported directly by the
4256 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4260 if (code
!= FLOAT_EXPR
4261 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4264 fltsz
= GET_MODE_SIZE (lhs_mode
);
4265 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4267 rhs_mode
= rhs_mode_iter
.require ();
4268 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4272 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4273 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4274 if (cvt_type
== NULL_TREE
)
4277 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4279 if (!supportable_convert_operation (code
, vectype_out
,
4280 cvt_type
, &decl1
, &codecvt1
))
4283 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4284 cvt_type
, &codecvt1
,
4285 &codecvt2
, &multi_step_cvt
,
4289 gcc_assert (multi_step_cvt
== 0);
4291 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4292 vectype_in
, &code1
, &code2
,
4293 &multi_step_cvt
, &interm_types
))
4303 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4304 codecvt2
= ERROR_MARK
;
4308 interm_types
.safe_push (cvt_type
);
4309 cvt_type
= NULL_TREE
;
4314 gcc_assert (op_type
== unary_op
);
4315 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4316 &code1
, &multi_step_cvt
,
4320 if (code
!= FIX_TRUNC_EXPR
4321 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4325 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4326 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4327 if (cvt_type
== NULL_TREE
)
4329 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4332 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4333 &code1
, &multi_step_cvt
,
4342 if (!vec_stmt
) /* transformation not required. */
4344 if (dump_enabled_p ())
4345 dump_printf_loc (MSG_NOTE
, vect_location
,
4346 "=== vectorizable_conversion ===\n");
4347 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4349 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4350 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4352 else if (modifier
== NARROW
)
4354 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4355 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4359 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4360 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4362 interm_types
.release ();
4367 if (dump_enabled_p ())
4368 dump_printf_loc (MSG_NOTE
, vect_location
,
4369 "transform conversion. ncopies = %d.\n", ncopies
);
4371 if (op_type
== binary_op
)
4373 if (CONSTANT_CLASS_P (op0
))
4374 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4375 else if (CONSTANT_CLASS_P (op1
))
4376 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4379 /* In case of multi-step conversion, we first generate conversion operations
4380 to the intermediate types, and then from that types to the final one.
4381 We create vector destinations for the intermediate type (TYPES) received
4382 from supportable_*_operation, and store them in the correct order
4383 for future use in vect_create_vectorized_*_stmts (). */
4384 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4385 vec_dest
= vect_create_destination_var (scalar_dest
,
4386 (cvt_type
&& modifier
== WIDEN
)
4387 ? cvt_type
: vectype_out
);
4388 vec_dsts
.quick_push (vec_dest
);
4392 for (i
= interm_types
.length () - 1;
4393 interm_types
.iterate (i
, &intermediate_type
); i
--)
4395 vec_dest
= vect_create_destination_var (scalar_dest
,
4397 vec_dsts
.quick_push (vec_dest
);
4402 vec_dest
= vect_create_destination_var (scalar_dest
,
4404 ? vectype_out
: cvt_type
);
4408 if (modifier
== WIDEN
)
4410 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4411 if (op_type
== binary_op
)
4412 vec_oprnds1
.create (1);
4414 else if (modifier
== NARROW
)
4415 vec_oprnds0
.create (
4416 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4418 else if (code
== WIDEN_LSHIFT_EXPR
)
4419 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4422 prev_stmt_info
= NULL
;
4426 for (j
= 0; j
< ncopies
; j
++)
4429 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4431 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4433 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4435 /* Arguments are ready, create the new vector stmt. */
4436 if (code1
== CALL_EXPR
)
4438 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4439 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4440 gimple_call_set_lhs (new_stmt
, new_temp
);
4444 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4445 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4446 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4447 gimple_assign_set_lhs (new_stmt
, new_temp
);
4450 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4452 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4455 if (!prev_stmt_info
)
4456 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4458 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4459 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4466 /* In case the vectorization factor (VF) is bigger than the number
4467 of elements that we can fit in a vectype (nunits), we have to
4468 generate more than one vector stmt - i.e - we need to "unroll"
4469 the vector stmt by a factor VF/nunits. */
4470 for (j
= 0; j
< ncopies
; j
++)
4477 if (code
== WIDEN_LSHIFT_EXPR
)
4482 /* Store vec_oprnd1 for every vector stmt to be created
4483 for SLP_NODE. We check during the analysis that all
4484 the shift arguments are the same. */
4485 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4486 vec_oprnds1
.quick_push (vec_oprnd1
);
4488 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4492 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4493 &vec_oprnds1
, slp_node
);
4497 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4498 vec_oprnds0
.quick_push (vec_oprnd0
);
4499 if (op_type
== binary_op
)
4501 if (code
== WIDEN_LSHIFT_EXPR
)
4504 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4505 vec_oprnds1
.quick_push (vec_oprnd1
);
4511 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4512 vec_oprnds0
.truncate (0);
4513 vec_oprnds0
.quick_push (vec_oprnd0
);
4514 if (op_type
== binary_op
)
4516 if (code
== WIDEN_LSHIFT_EXPR
)
4519 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4521 vec_oprnds1
.truncate (0);
4522 vec_oprnds1
.quick_push (vec_oprnd1
);
4526 /* Arguments are ready. Create the new vector stmts. */
4527 for (i
= multi_step_cvt
; i
>= 0; i
--)
4529 tree this_dest
= vec_dsts
[i
];
4530 enum tree_code c1
= code1
, c2
= code2
;
4531 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4536 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4538 stmt
, this_dest
, gsi
,
4539 c1
, c2
, decl1
, decl2
,
4543 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4547 if (codecvt1
== CALL_EXPR
)
4549 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4550 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4551 gimple_call_set_lhs (new_stmt
, new_temp
);
4555 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4556 new_temp
= make_ssa_name (vec_dest
);
4557 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4561 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4564 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4567 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4570 if (!prev_stmt_info
)
4571 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4573 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4574 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4579 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4583 /* In case the vectorization factor (VF) is bigger than the number
4584 of elements that we can fit in a vectype (nunits), we have to
4585 generate more than one vector stmt - i.e - we need to "unroll"
4586 the vector stmt by a factor VF/nunits. */
4587 for (j
= 0; j
< ncopies
; j
++)
4591 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4595 vec_oprnds0
.truncate (0);
4596 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4597 vect_pow2 (multi_step_cvt
) - 1);
4600 /* Arguments are ready. Create the new vector stmts. */
4602 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4604 if (codecvt1
== CALL_EXPR
)
4606 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4607 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4608 gimple_call_set_lhs (new_stmt
, new_temp
);
4612 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4613 new_temp
= make_ssa_name (vec_dest
);
4614 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4618 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4619 vec_oprnds0
[i
] = new_temp
;
4622 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4623 stmt
, vec_dsts
, gsi
,
4628 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4632 vec_oprnds0
.release ();
4633 vec_oprnds1
.release ();
4634 interm_types
.release ();
4640 /* Function vectorizable_assignment.
4642 Check if STMT performs an assignment (copy) that can be vectorized.
4643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4648 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4649 gimple
**vec_stmt
, slp_tree slp_node
)
4654 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4655 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4658 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
4662 vec
<tree
> vec_oprnds
= vNULL
;
4664 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4665 vec_info
*vinfo
= stmt_info
->vinfo
;
4666 gimple
*new_stmt
= NULL
;
4667 stmt_vec_info prev_stmt_info
= NULL
;
4668 enum tree_code code
;
4671 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4674 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4678 /* Is vectorizable assignment? */
4679 if (!is_gimple_assign (stmt
))
4682 scalar_dest
= gimple_assign_lhs (stmt
);
4683 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4686 code
= gimple_assign_rhs_code (stmt
);
4687 if (gimple_assign_single_p (stmt
)
4688 || code
== PAREN_EXPR
4689 || CONVERT_EXPR_CODE_P (code
))
4690 op
= gimple_assign_rhs1 (stmt
);
4694 if (code
== VIEW_CONVERT_EXPR
)
4695 op
= TREE_OPERAND (op
, 0);
4697 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4698 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4700 /* Multiple types in SLP are handled by creating the appropriate number of
4701 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4706 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4708 gcc_assert (ncopies
>= 1);
4710 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4714 "use not simple.\n");
4718 /* We can handle NOP_EXPR conversions that do not change the number
4719 of elements or the vector size. */
4720 if ((CONVERT_EXPR_CODE_P (code
)
4721 || code
== VIEW_CONVERT_EXPR
)
4723 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4724 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4725 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4728 /* We do not handle bit-precision changes. */
4729 if ((CONVERT_EXPR_CODE_P (code
)
4730 || code
== VIEW_CONVERT_EXPR
)
4731 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4732 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
4733 || !type_has_mode_precision_p (TREE_TYPE (op
)))
4734 /* But a conversion that does not change the bit-pattern is ok. */
4735 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4736 > TYPE_PRECISION (TREE_TYPE (op
)))
4737 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4738 /* Conversion between boolean types of different sizes is
4739 a simple assignment in case their vectypes are same
4741 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4742 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4744 if (dump_enabled_p ())
4745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4746 "type conversion to/from bit-precision "
4751 if (!vec_stmt
) /* transformation not required. */
4753 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4754 if (dump_enabled_p ())
4755 dump_printf_loc (MSG_NOTE
, vect_location
,
4756 "=== vectorizable_assignment ===\n");
4757 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4766 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4769 for (j
= 0; j
< ncopies
; j
++)
4773 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
4775 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4777 /* Arguments are ready. create the new vector stmt. */
4778 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4780 if (CONVERT_EXPR_CODE_P (code
)
4781 || code
== VIEW_CONVERT_EXPR
)
4782 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4783 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4784 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4785 gimple_assign_set_lhs (new_stmt
, new_temp
);
4786 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4788 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4795 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4797 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4799 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4802 vec_oprnds
.release ();
4807 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4808 either as shift by a scalar or by a vector. */
4811 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4814 machine_mode vec_mode
;
4819 vectype
= get_vectype_for_scalar_type (scalar_type
);
4823 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4825 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4827 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4829 || (optab_handler (optab
, TYPE_MODE (vectype
))
4830 == CODE_FOR_nothing
))
4834 vec_mode
= TYPE_MODE (vectype
);
4835 icode
= (int) optab_handler (optab
, vec_mode
);
4836 if (icode
== CODE_FOR_nothing
)
4843 /* Function vectorizable_shift.
4845 Check if STMT performs a shift operation that can be vectorized.
4846 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4847 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4848 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4851 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4852 gimple
**vec_stmt
, slp_tree slp_node
)
4856 tree op0
, op1
= NULL
;
4857 tree vec_oprnd1
= NULL_TREE
;
4858 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4860 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4861 enum tree_code code
;
4862 machine_mode vec_mode
;
4866 machine_mode optab_op2_mode
;
4868 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4870 gimple
*new_stmt
= NULL
;
4871 stmt_vec_info prev_stmt_info
;
4878 vec
<tree
> vec_oprnds0
= vNULL
;
4879 vec
<tree
> vec_oprnds1
= vNULL
;
4882 bool scalar_shift_arg
= true;
4883 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4884 vec_info
*vinfo
= stmt_info
->vinfo
;
4886 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4889 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4893 /* Is STMT a vectorizable binary/unary operation? */
4894 if (!is_gimple_assign (stmt
))
4897 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4900 code
= gimple_assign_rhs_code (stmt
);
4902 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4903 || code
== RROTATE_EXPR
))
4906 scalar_dest
= gimple_assign_lhs (stmt
);
4907 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4908 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
4910 if (dump_enabled_p ())
4911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4912 "bit-precision shifts not supported.\n");
4916 op0
= gimple_assign_rhs1 (stmt
);
4917 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4919 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4921 "use not simple.\n");
4924 /* If op0 is an external or constant def use a vector type with
4925 the same size as the output vector type. */
4927 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4929 gcc_assert (vectype
);
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4934 "no vectype for scalar type\n");
4938 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4939 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4940 if (nunits_out
!= nunits_in
)
4943 op1
= gimple_assign_rhs2 (stmt
);
4944 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4946 if (dump_enabled_p ())
4947 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4948 "use not simple.\n");
4952 /* Multiple types in SLP are handled by creating the appropriate number of
4953 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4958 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4960 gcc_assert (ncopies
>= 1);
4962 /* Determine whether the shift amount is a vector, or scalar. If the
4963 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4965 if ((dt
[1] == vect_internal_def
4966 || dt
[1] == vect_induction_def
)
4968 scalar_shift_arg
= false;
4969 else if (dt
[1] == vect_constant_def
4970 || dt
[1] == vect_external_def
4971 || dt
[1] == vect_internal_def
)
4973 /* In SLP, need to check whether the shift count is the same,
4974 in loops if it is a constant or invariant, it is always
4978 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4981 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4982 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4983 scalar_shift_arg
= false;
4986 /* If the shift amount is computed by a pattern stmt we cannot
4987 use the scalar amount directly thus give up and use a vector
4989 if (dt
[1] == vect_internal_def
)
4991 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4992 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4993 scalar_shift_arg
= false;
4998 if (dump_enabled_p ())
4999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5000 "operand mode requires invariant argument.\n");
5004 /* Vector shifted by vector. */
5005 if (!scalar_shift_arg
)
5007 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE
, vect_location
,
5010 "vector/vector shift/rotate found.\n");
5013 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5014 if (op1_vectype
== NULL_TREE
5015 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5017 if (dump_enabled_p ())
5018 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5019 "unusable type for last operand in"
5020 " vector/vector shift/rotate.\n");
5024 /* See if the machine has a vector shifted by scalar insn and if not
5025 then see if it has a vector shifted by vector insn. */
5028 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5030 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5032 if (dump_enabled_p ())
5033 dump_printf_loc (MSG_NOTE
, vect_location
,
5034 "vector/scalar shift/rotate found.\n");
5038 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5040 && (optab_handler (optab
, TYPE_MODE (vectype
))
5041 != CODE_FOR_nothing
))
5043 scalar_shift_arg
= false;
5045 if (dump_enabled_p ())
5046 dump_printf_loc (MSG_NOTE
, vect_location
,
5047 "vector/vector shift/rotate found.\n");
5049 /* Unlike the other binary operators, shifts/rotates have
5050 the rhs being int, instead of the same type as the lhs,
5051 so make sure the scalar is the right type if we are
5052 dealing with vectors of long long/long/short/char. */
5053 if (dt
[1] == vect_constant_def
)
5054 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5055 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5059 && TYPE_MODE (TREE_TYPE (vectype
))
5060 != TYPE_MODE (TREE_TYPE (op1
)))
5062 if (dump_enabled_p ())
5063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5064 "unusable type for last operand in"
5065 " vector/vector shift/rotate.\n");
5068 if (vec_stmt
&& !slp_node
)
5070 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5071 op1
= vect_init_vector (stmt
, op1
,
5072 TREE_TYPE (vectype
), NULL
);
5079 /* Supportable by target? */
5082 if (dump_enabled_p ())
5083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5087 vec_mode
= TYPE_MODE (vectype
);
5088 icode
= (int) optab_handler (optab
, vec_mode
);
5089 if (icode
== CODE_FOR_nothing
)
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5093 "op not supported by target.\n");
5094 /* Check only during analysis. */
5095 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5097 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_NOTE
, vect_location
,
5101 "proceeding using word mode.\n");
5104 /* Worthwhile without SIMD support? Check only during analysis. */
5106 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5107 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5109 if (dump_enabled_p ())
5110 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5111 "not worthwhile without SIMD support.\n");
5115 if (!vec_stmt
) /* transformation not required. */
5117 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_NOTE
, vect_location
,
5120 "=== vectorizable_shift ===\n");
5121 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_NOTE
, vect_location
,
5129 "transform binary/unary operation.\n");
5132 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5134 prev_stmt_info
= NULL
;
5135 for (j
= 0; j
< ncopies
; j
++)
5140 if (scalar_shift_arg
)
5142 /* Vector shl and shr insn patterns can be defined with scalar
5143 operand 2 (shift operand). In this case, use constant or loop
5144 invariant op1 directly, without extending it to vector mode
5146 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5147 if (!VECTOR_MODE_P (optab_op2_mode
))
5149 if (dump_enabled_p ())
5150 dump_printf_loc (MSG_NOTE
, vect_location
,
5151 "operand 1 using scalar mode.\n");
5153 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5154 vec_oprnds1
.quick_push (vec_oprnd1
);
5157 /* Store vec_oprnd1 for every vector stmt to be created
5158 for SLP_NODE. We check during the analysis that all
5159 the shift arguments are the same.
5160 TODO: Allow different constants for different vector
5161 stmts generated for an SLP instance. */
5162 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5163 vec_oprnds1
.quick_push (vec_oprnd1
);
5168 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5169 (a special case for certain kind of vector shifts); otherwise,
5170 operand 1 should be of a vector type (the usual case). */
5172 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5175 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5179 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5181 /* Arguments are ready. Create the new vector stmt. */
5182 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5184 vop1
= vec_oprnds1
[i
];
5185 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5186 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5187 gimple_assign_set_lhs (new_stmt
, new_temp
);
5188 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5190 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5197 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5199 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5200 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5203 vec_oprnds0
.release ();
5204 vec_oprnds1
.release ();
5210 /* Function vectorizable_operation.
5212 Check if STMT performs a binary, unary or ternary operation that can
5214 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5215 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5216 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5219 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5220 gimple
**vec_stmt
, slp_tree slp_node
)
5224 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5225 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5227 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5228 enum tree_code code
;
5229 machine_mode vec_mode
;
5233 bool target_support_p
;
5235 enum vect_def_type dt
[3]
5236 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5238 gimple
*new_stmt
= NULL
;
5239 stmt_vec_info prev_stmt_info
;
5245 vec
<tree
> vec_oprnds0
= vNULL
;
5246 vec
<tree
> vec_oprnds1
= vNULL
;
5247 vec
<tree
> vec_oprnds2
= vNULL
;
5248 tree vop0
, vop1
, vop2
;
5249 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5250 vec_info
*vinfo
= stmt_info
->vinfo
;
5252 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5255 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5259 /* Is STMT a vectorizable binary/unary operation? */
5260 if (!is_gimple_assign (stmt
))
5263 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5266 code
= gimple_assign_rhs_code (stmt
);
5268 /* For pointer addition and subtraction, we should use the normal
5269 plus and minus for the vector operation. */
5270 if (code
== POINTER_PLUS_EXPR
)
5272 if (code
== POINTER_DIFF_EXPR
)
5275 /* Support only unary or binary operations. */
5276 op_type
= TREE_CODE_LENGTH (code
);
5277 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5281 "num. args = %d (not unary/binary/ternary op).\n",
5286 scalar_dest
= gimple_assign_lhs (stmt
);
5287 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5289 /* Most operations cannot handle bit-precision types without extra
5291 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5292 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5293 /* Exception are bitwise binary operations. */
5294 && code
!= BIT_IOR_EXPR
5295 && code
!= BIT_XOR_EXPR
5296 && code
!= BIT_AND_EXPR
)
5298 if (dump_enabled_p ())
5299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5300 "bit-precision arithmetic not supported.\n");
5304 op0
= gimple_assign_rhs1 (stmt
);
5305 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5307 if (dump_enabled_p ())
5308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5309 "use not simple.\n");
5312 /* If op0 is an external or constant def use a vector type with
5313 the same size as the output vector type. */
5316 /* For boolean type we cannot determine vectype by
5317 invariant value (don't know whether it is a vector
5318 of booleans or vector of integers). We use output
5319 vectype because operations on boolean don't change
5321 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5323 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5325 if (dump_enabled_p ())
5326 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5327 "not supported operation on bool value.\n");
5330 vectype
= vectype_out
;
5333 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5336 gcc_assert (vectype
);
5339 if (dump_enabled_p ())
5341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5342 "no vectype for scalar type ");
5343 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5345 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5351 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5352 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5353 if (nunits_out
!= nunits_in
)
5356 if (op_type
== binary_op
|| op_type
== ternary_op
)
5358 op1
= gimple_assign_rhs2 (stmt
);
5359 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5361 if (dump_enabled_p ())
5362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5363 "use not simple.\n");
5367 if (op_type
== ternary_op
)
5369 op2
= gimple_assign_rhs3 (stmt
);
5370 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5372 if (dump_enabled_p ())
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5374 "use not simple.\n");
5379 /* Multiple types in SLP are handled by creating the appropriate number of
5380 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5385 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5387 gcc_assert (ncopies
>= 1);
5389 /* Shifts are handled in vectorizable_shift (). */
5390 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5391 || code
== RROTATE_EXPR
)
5394 /* Supportable by target? */
5396 vec_mode
= TYPE_MODE (vectype
);
5397 if (code
== MULT_HIGHPART_EXPR
)
5398 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5401 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5409 target_support_p
= (optab_handler (optab
, vec_mode
)
5410 != CODE_FOR_nothing
);
5413 if (!target_support_p
)
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5417 "op not supported by target.\n");
5418 /* Check only during analysis. */
5419 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5420 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5422 if (dump_enabled_p ())
5423 dump_printf_loc (MSG_NOTE
, vect_location
,
5424 "proceeding using word mode.\n");
5427 /* Worthwhile without SIMD support? Check only during analysis. */
5428 if (!VECTOR_MODE_P (vec_mode
)
5430 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5434 "not worthwhile without SIMD support.\n");
5438 if (!vec_stmt
) /* transformation not required. */
5440 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_NOTE
, vect_location
,
5443 "=== vectorizable_operation ===\n");
5444 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5450 if (dump_enabled_p ())
5451 dump_printf_loc (MSG_NOTE
, vect_location
,
5452 "transform binary/unary operation.\n");
5455 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5457 /* In case the vectorization factor (VF) is bigger than the number
5458 of elements that we can fit in a vectype (nunits), we have to generate
5459 more than one vector stmt - i.e - we need to "unroll" the
5460 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5461 from one copy of the vector stmt to the next, in the field
5462 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5463 stages to find the correct vector defs to be used when vectorizing
5464 stmts that use the defs of the current stmt. The example below
5465 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5466 we need to create 4 vectorized stmts):
5468 before vectorization:
5469 RELATED_STMT VEC_STMT
5473 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5475 RELATED_STMT VEC_STMT
5476 VS1_0: vx0 = memref0 VS1_1 -
5477 VS1_1: vx1 = memref1 VS1_2 -
5478 VS1_2: vx2 = memref2 VS1_3 -
5479 VS1_3: vx3 = memref3 - -
5480 S1: x = load - VS1_0
5483 step2: vectorize stmt S2 (done here):
5484 To vectorize stmt S2 we first need to find the relevant vector
5485 def for the first operand 'x'. This is, as usual, obtained from
5486 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5487 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5488 relevant vector def 'vx0'. Having found 'vx0' we can generate
5489 the vector stmt VS2_0, and as usual, record it in the
5490 STMT_VINFO_VEC_STMT of stmt S2.
5491 When creating the second copy (VS2_1), we obtain the relevant vector
5492 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5493 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5494 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5495 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5496 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5497 chain of stmts and pointers:
5498 RELATED_STMT VEC_STMT
5499 VS1_0: vx0 = memref0 VS1_1 -
5500 VS1_1: vx1 = memref1 VS1_2 -
5501 VS1_2: vx2 = memref2 VS1_3 -
5502 VS1_3: vx3 = memref3 - -
5503 S1: x = load - VS1_0
5504 VS2_0: vz0 = vx0 + v1 VS2_1 -
5505 VS2_1: vz1 = vx1 + v1 VS2_2 -
5506 VS2_2: vz2 = vx2 + v1 VS2_3 -
5507 VS2_3: vz3 = vx3 + v1 - -
5508 S2: z = x + 1 - VS2_0 */
5510 prev_stmt_info
= NULL
;
5511 for (j
= 0; j
< ncopies
; j
++)
5516 if (op_type
== binary_op
|| op_type
== ternary_op
)
5517 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5520 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5522 if (op_type
== ternary_op
)
5523 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5528 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5529 if (op_type
== ternary_op
)
5531 tree vec_oprnd
= vec_oprnds2
.pop ();
5532 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5537 /* Arguments are ready. Create the new vector stmt. */
5538 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5540 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5541 ? vec_oprnds1
[i
] : NULL_TREE
);
5542 vop2
= ((op_type
== ternary_op
)
5543 ? vec_oprnds2
[i
] : NULL_TREE
);
5544 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5545 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5546 gimple_assign_set_lhs (new_stmt
, new_temp
);
5547 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5549 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5556 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5558 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5559 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5562 vec_oprnds0
.release ();
5563 vec_oprnds1
.release ();
5564 vec_oprnds2
.release ();
5569 /* A helper function to ensure data reference DR's base alignment. */
5572 ensure_base_align (struct data_reference
*dr
)
5577 if (DR_VECT_AUX (dr
)->base_misaligned
)
5579 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5581 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
5583 if (decl_in_symtab_p (base_decl
))
5584 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
5587 SET_DECL_ALIGN (base_decl
, align_base_to
);
5588 DECL_USER_ALIGN (base_decl
) = 1;
5590 DR_VECT_AUX (dr
)->base_misaligned
= false;
5595 /* Function get_group_alias_ptr_type.
5597 Return the alias type for the group starting at FIRST_STMT. */
5600 get_group_alias_ptr_type (gimple
*first_stmt
)
5602 struct data_reference
*first_dr
, *next_dr
;
5605 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5606 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5609 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5610 if (get_alias_set (DR_REF (first_dr
))
5611 != get_alias_set (DR_REF (next_dr
)))
5613 if (dump_enabled_p ())
5614 dump_printf_loc (MSG_NOTE
, vect_location
,
5615 "conflicting alias set types.\n");
5616 return ptr_type_node
;
5618 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5620 return reference_alias_ptr_type (DR_REF (first_dr
));
5624 /* Function vectorizable_store.
5626 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5628 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5629 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5630 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5633 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5639 tree vec_oprnd
= NULL_TREE
;
5640 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5641 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5643 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5644 struct loop
*loop
= NULL
;
5645 machine_mode vec_mode
;
5647 enum dr_alignment_support alignment_support_scheme
;
5649 enum vect_def_type dt
;
5650 stmt_vec_info prev_stmt_info
= NULL
;
5651 tree dataref_ptr
= NULL_TREE
;
5652 tree dataref_offset
= NULL_TREE
;
5653 gimple
*ptr_incr
= NULL
;
5656 gimple
*next_stmt
, *first_stmt
;
5658 unsigned int group_size
, i
;
5659 vec
<tree
> oprnds
= vNULL
;
5660 vec
<tree
> result_chain
= vNULL
;
5662 tree offset
= NULL_TREE
;
5663 vec
<tree
> vec_oprnds
= vNULL
;
5664 bool slp
= (slp_node
!= NULL
);
5665 unsigned int vec_num
;
5666 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5667 vec_info
*vinfo
= stmt_info
->vinfo
;
5669 gather_scatter_info gs_info
;
5670 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5673 vec_load_store_type vls_type
;
5676 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5679 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5683 /* Is vectorizable store? */
5685 if (!is_gimple_assign (stmt
))
5688 scalar_dest
= gimple_assign_lhs (stmt
);
5689 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5690 && is_pattern_stmt_p (stmt_info
))
5691 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5692 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5693 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5694 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5695 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5696 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5697 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5698 && TREE_CODE (scalar_dest
) != MEM_REF
)
5701 /* Cannot have hybrid store SLP -- that would mean storing to the
5702 same location twice. */
5703 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5705 gcc_assert (gimple_assign_single_p (stmt
));
5707 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5708 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5712 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5713 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5718 /* Multiple types in SLP are handled by creating the appropriate number of
5719 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5724 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5726 gcc_assert (ncopies
>= 1);
5728 /* FORNOW. This restriction should be relaxed. */
5729 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5733 "multiple types in nested loop.\n");
5737 op
= gimple_assign_rhs1 (stmt
);
5739 /* In the case this is a store from a constant make sure
5740 native_encode_expr can handle it. */
5741 if (CONSTANT_CLASS_P (op
) && native_encode_expr (op
, NULL
, 64) == 0)
5744 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5748 "use not simple.\n");
5752 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5753 vls_type
= VLS_STORE_INVARIANT
;
5755 vls_type
= VLS_STORE
;
5757 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5760 elem_type
= TREE_TYPE (vectype
);
5761 vec_mode
= TYPE_MODE (vectype
);
5763 /* FORNOW. In some cases can vectorize even if data-type not supported
5764 (e.g. - array initialization with 0). */
5765 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5768 if (!STMT_VINFO_DATA_REF (stmt_info
))
5771 vect_memory_access_type memory_access_type
;
5772 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5773 &memory_access_type
, &gs_info
))
5776 if (!vec_stmt
) /* transformation not required. */
5778 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5779 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5780 /* The SLP costs are calculated during SLP analysis. */
5781 if (!PURE_SLP_STMT (stmt_info
))
5782 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5786 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5790 ensure_base_align (dr
);
5792 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5794 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5795 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5796 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5797 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5798 edge pe
= loop_preheader_edge (loop
);
5801 enum { NARROW
, NONE
, WIDEN
} modifier
;
5802 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5804 if (nunits
== (unsigned int) scatter_off_nunits
)
5806 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5810 auto_vec_perm_indices
sel (scatter_off_nunits
);
5811 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5812 sel
.quick_push (i
| nunits
);
5814 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
5815 gcc_assert (perm_mask
!= NULL_TREE
);
5817 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5821 auto_vec_perm_indices
sel (nunits
);
5822 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5823 sel
.quick_push (i
| scatter_off_nunits
);
5825 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5826 gcc_assert (perm_mask
!= NULL_TREE
);
5832 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5833 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5834 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5835 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5836 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5837 scaletype
= TREE_VALUE (arglist
);
5839 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5840 && TREE_CODE (rettype
) == VOID_TYPE
);
5842 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5843 if (!is_gimple_min_invariant (ptr
))
5845 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5846 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5847 gcc_assert (!new_bb
);
5850 /* Currently we support only unconditional scatter stores,
5851 so mask should be all ones. */
5852 mask
= build_int_cst (masktype
, -1);
5853 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5855 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5857 prev_stmt_info
= NULL
;
5858 for (j
= 0; j
< ncopies
; ++j
)
5863 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5865 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5867 else if (modifier
!= NONE
&& (j
& 1))
5869 if (modifier
== WIDEN
)
5872 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5873 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5876 else if (modifier
== NARROW
)
5878 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5881 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5890 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5892 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5896 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5898 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5899 == TYPE_VECTOR_SUBPARTS (srctype
));
5900 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5901 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5902 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5903 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5907 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5909 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5910 == TYPE_VECTOR_SUBPARTS (idxtype
));
5911 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5912 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5913 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5914 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5919 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5921 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5923 if (prev_stmt_info
== NULL
)
5924 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5926 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5927 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5932 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5935 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5936 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5937 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5939 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5942 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5944 /* We vectorize all the stmts of the interleaving group when we
5945 reach the last stmt in the group. */
5946 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5947 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5956 grouped_store
= false;
5957 /* VEC_NUM is the number of vect stmts to be created for this
5959 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5960 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5961 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5962 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5963 op
= gimple_assign_rhs1 (first_stmt
);
5966 /* VEC_NUM is the number of vect stmts to be created for this
5968 vec_num
= group_size
;
5970 ref_type
= get_group_alias_ptr_type (first_stmt
);
5976 group_size
= vec_num
= 1;
5977 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
5980 if (dump_enabled_p ())
5981 dump_printf_loc (MSG_NOTE
, vect_location
,
5982 "transform store. ncopies = %d\n", ncopies
);
5984 if (memory_access_type
== VMAT_ELEMENTWISE
5985 || memory_access_type
== VMAT_STRIDED_SLP
)
5987 gimple_stmt_iterator incr_gsi
;
5993 gimple_seq stmts
= NULL
;
5994 tree stride_base
, stride_step
, alias_off
;
5998 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6001 = fold_build_pointer_plus
6002 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
6003 size_binop (PLUS_EXPR
,
6004 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
6005 convert_to_ptrofftype (DR_INIT (first_dr
))));
6006 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
6008 /* For a store with loop-invariant (but other than power-of-2)
6009 stride (i.e. not a grouped access) like so:
6011 for (i = 0; i < n; i += stride)
6014 we generate a new induction variable and new stores from
6015 the components of the (vectorized) rhs:
6017 for (j = 0; ; j += VF*stride)
6022 array[j + stride] = tmp2;
6026 unsigned nstores
= nunits
;
6028 tree ltype
= elem_type
;
6029 tree lvectype
= vectype
;
6032 if (group_size
< nunits
6033 && nunits
% group_size
== 0)
6035 nstores
= nunits
/ group_size
;
6037 ltype
= build_vector_type (elem_type
, group_size
);
6040 /* First check if vec_extract optab doesn't support extraction
6041 of vector elts directly. */
6042 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6044 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6045 || !VECTOR_MODE_P (vmode
)
6046 || (convert_optab_handler (vec_extract_optab
,
6047 TYPE_MODE (vectype
), vmode
)
6048 == CODE_FOR_nothing
))
6050 /* Try to avoid emitting an extract of vector elements
6051 by performing the extracts using an integer type of the
6052 same size, extracting from a vector of those and then
6053 re-interpreting it as the original vector type if
6056 = group_size
* GET_MODE_BITSIZE (elmode
);
6057 elmode
= int_mode_for_size (lsize
, 0).require ();
6058 /* If we can't construct such a vector fall back to
6059 element extracts from the original vector type and
6060 element size stores. */
6061 if (mode_for_vector (elmode
,
6062 nunits
/ group_size
).exists (&vmode
)
6063 && VECTOR_MODE_P (vmode
)
6064 && (convert_optab_handler (vec_extract_optab
,
6066 != CODE_FOR_nothing
))
6068 nstores
= nunits
/ group_size
;
6070 ltype
= build_nonstandard_integer_type (lsize
, 1);
6071 lvectype
= build_vector_type (ltype
, nstores
);
6073 /* Else fall back to vector extraction anyway.
6074 Fewer stores are more important than avoiding spilling
6075 of the vector we extract from. Compared to the
6076 construction case in vectorizable_load no store-forwarding
6077 issue exists here for reasonable archs. */
6080 else if (group_size
>= nunits
6081 && group_size
% nunits
== 0)
6088 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6089 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6092 ivstep
= stride_step
;
6093 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6094 build_int_cst (TREE_TYPE (ivstep
), vf
));
6096 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6098 create_iv (stride_base
, ivstep
, NULL
,
6099 loop
, &incr_gsi
, insert_after
,
6101 incr
= gsi_stmt (incr_gsi
);
6102 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6104 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6106 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6108 prev_stmt_info
= NULL
;
6109 alias_off
= build_int_cst (ref_type
, 0);
6110 next_stmt
= first_stmt
;
6111 for (g
= 0; g
< group_size
; g
++)
6113 running_off
= offvar
;
6116 tree size
= TYPE_SIZE_UNIT (ltype
);
6117 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6119 tree newoff
= copy_ssa_name (running_off
, NULL
);
6120 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6122 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6123 running_off
= newoff
;
6125 unsigned int group_el
= 0;
6126 unsigned HOST_WIDE_INT
6127 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6128 for (j
= 0; j
< ncopies
; j
++)
6130 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6131 and first_stmt == stmt. */
6136 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6138 vec_oprnd
= vec_oprnds
[0];
6142 gcc_assert (gimple_assign_single_p (next_stmt
));
6143 op
= gimple_assign_rhs1 (next_stmt
);
6144 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6150 vec_oprnd
= vec_oprnds
[j
];
6153 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6154 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6157 /* Pun the vector to extract from if necessary. */
6158 if (lvectype
!= vectype
)
6160 tree tem
= make_ssa_name (lvectype
);
6162 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6163 lvectype
, vec_oprnd
));
6164 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6167 for (i
= 0; i
< nstores
; i
++)
6169 tree newref
, newoff
;
6170 gimple
*incr
, *assign
;
6171 tree size
= TYPE_SIZE (ltype
);
6172 /* Extract the i'th component. */
6173 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6174 bitsize_int (i
), size
);
6175 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6178 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6182 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6184 newref
= build2 (MEM_REF
, ltype
,
6185 running_off
, this_off
);
6187 /* And store it to *running_off. */
6188 assign
= gimple_build_assign (newref
, elem
);
6189 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6193 || group_el
== group_size
)
6195 newoff
= copy_ssa_name (running_off
, NULL
);
6196 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6197 running_off
, stride_step
);
6198 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6200 running_off
= newoff
;
6203 if (g
== group_size
- 1
6206 if (j
== 0 && i
== 0)
6207 STMT_VINFO_VEC_STMT (stmt_info
)
6208 = *vec_stmt
= assign
;
6210 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6211 prev_stmt_info
= vinfo_for_stmt (assign
);
6215 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6220 vec_oprnds
.release ();
6224 auto_vec
<tree
> dr_chain (group_size
);
6225 oprnds
.create (group_size
);
6227 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6228 gcc_assert (alignment_support_scheme
);
6229 /* Targets with store-lane instructions must not require explicit
6231 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6232 || alignment_support_scheme
== dr_aligned
6233 || alignment_support_scheme
== dr_unaligned_supported
);
6235 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6236 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6237 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6239 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6240 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6242 aggr_type
= vectype
;
6244 /* In case the vectorization factor (VF) is bigger than the number
6245 of elements that we can fit in a vectype (nunits), we have to generate
6246 more than one vector stmt - i.e - we need to "unroll" the
6247 vector stmt by a factor VF/nunits. For more details see documentation in
6248 vect_get_vec_def_for_copy_stmt. */
6250 /* In case of interleaving (non-unit grouped access):
6257 We create vectorized stores starting from base address (the access of the
6258 first stmt in the chain (S2 in the above example), when the last store stmt
6259 of the chain (S4) is reached:
6262 VS2: &base + vec_size*1 = vx0
6263 VS3: &base + vec_size*2 = vx1
6264 VS4: &base + vec_size*3 = vx3
6266 Then permutation statements are generated:
6268 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6269 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6272 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6273 (the order of the data-refs in the output of vect_permute_store_chain
6274 corresponds to the order of scalar stmts in the interleaving chain - see
6275 the documentation of vect_permute_store_chain()).
6277 In case of both multiple types and interleaving, above vector stores and
6278 permutation stmts are created for every copy. The result vector stmts are
6279 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6280 STMT_VINFO_RELATED_STMT for the next copies.
6283 prev_stmt_info
= NULL
;
6284 for (j
= 0; j
< ncopies
; j
++)
6291 /* Get vectorized arguments for SLP_NODE. */
6292 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6295 vec_oprnd
= vec_oprnds
[0];
6299 /* For interleaved stores we collect vectorized defs for all the
6300 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6301 used as an input to vect_permute_store_chain(), and OPRNDS as
6302 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6304 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6305 OPRNDS are of size 1. */
6306 next_stmt
= first_stmt
;
6307 for (i
= 0; i
< group_size
; i
++)
6309 /* Since gaps are not supported for interleaved stores,
6310 GROUP_SIZE is the exact number of stmts in the chain.
6311 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6312 there is no interleaving, GROUP_SIZE is 1, and only one
6313 iteration of the loop will be executed. */
6314 gcc_assert (next_stmt
6315 && gimple_assign_single_p (next_stmt
));
6316 op
= gimple_assign_rhs1 (next_stmt
);
6318 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6319 dr_chain
.quick_push (vec_oprnd
);
6320 oprnds
.quick_push (vec_oprnd
);
6321 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6325 /* We should have catched mismatched types earlier. */
6326 gcc_assert (useless_type_conversion_p (vectype
,
6327 TREE_TYPE (vec_oprnd
)));
6328 bool simd_lane_access_p
6329 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6330 if (simd_lane_access_p
6331 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6332 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6333 && integer_zerop (DR_OFFSET (first_dr
))
6334 && integer_zerop (DR_INIT (first_dr
))
6335 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6336 get_alias_set (TREE_TYPE (ref_type
))))
6338 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6339 dataref_offset
= build_int_cst (ref_type
, 0);
6344 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6345 simd_lane_access_p
? loop
: NULL
,
6346 offset
, &dummy
, gsi
, &ptr_incr
,
6347 simd_lane_access_p
, &inv_p
);
6348 gcc_assert (bb_vinfo
|| !inv_p
);
6352 /* For interleaved stores we created vectorized defs for all the
6353 defs stored in OPRNDS in the previous iteration (previous copy).
6354 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6355 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6357 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6358 OPRNDS are of size 1. */
6359 for (i
= 0; i
< group_size
; i
++)
6362 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6363 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6364 dr_chain
[i
] = vec_oprnd
;
6365 oprnds
[i
] = vec_oprnd
;
6369 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6370 TYPE_SIZE_UNIT (aggr_type
));
6372 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6373 TYPE_SIZE_UNIT (aggr_type
));
6376 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6380 /* Combine all the vectors into an array. */
6381 vec_array
= create_vector_array (vectype
, vec_num
);
6382 for (i
= 0; i
< vec_num
; i
++)
6384 vec_oprnd
= dr_chain
[i
];
6385 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6389 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6390 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6391 gcall
*call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
6393 gimple_call_set_lhs (call
, data_ref
);
6394 gimple_call_set_nothrow (call
, true);
6396 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6404 result_chain
.create (group_size
);
6406 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6410 next_stmt
= first_stmt
;
6411 for (i
= 0; i
< vec_num
; i
++)
6413 unsigned align
, misalign
;
6416 /* Bump the vector pointer. */
6417 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6421 vec_oprnd
= vec_oprnds
[i
];
6422 else if (grouped_store
)
6423 /* For grouped stores vectorized defs are interleaved in
6424 vect_permute_store_chain(). */
6425 vec_oprnd
= result_chain
[i
];
6427 data_ref
= fold_build2 (MEM_REF
, vectype
,
6431 : build_int_cst (ref_type
, 0));
6432 align
= DR_TARGET_ALIGNMENT (first_dr
);
6433 if (aligned_access_p (first_dr
))
6435 else if (DR_MISALIGNMENT (first_dr
) == -1)
6437 align
= dr_alignment (vect_dr_behavior (first_dr
));
6439 TREE_TYPE (data_ref
)
6440 = build_aligned_type (TREE_TYPE (data_ref
),
6441 align
* BITS_PER_UNIT
);
6445 TREE_TYPE (data_ref
)
6446 = build_aligned_type (TREE_TYPE (data_ref
),
6447 TYPE_ALIGN (elem_type
));
6448 misalign
= DR_MISALIGNMENT (first_dr
);
6450 if (dataref_offset
== NULL_TREE
6451 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6452 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6455 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6457 tree perm_mask
= perm_mask_for_reverse (vectype
);
6459 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6461 tree new_temp
= make_ssa_name (perm_dest
);
6463 /* Generate the permute statement. */
6465 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6466 vec_oprnd
, perm_mask
);
6467 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6469 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6470 vec_oprnd
= new_temp
;
6473 /* Arguments are ready. Create the new vector stmt. */
6474 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6475 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6480 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6488 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6490 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6491 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6496 result_chain
.release ();
6497 vec_oprnds
.release ();
6502 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6503 VECTOR_CST mask. No checks are made that the target platform supports the
6504 mask, so callers may wish to test can_vec_perm_p separately, or use
6505 vect_gen_perm_mask_checked. */
6508 vect_gen_perm_mask_any (tree vectype
, vec_perm_indices sel
)
6510 tree mask_elt_type
, mask_type
, mask_vec
;
6512 unsigned int nunits
= sel
.length ();
6513 gcc_checking_assert (nunits
== TYPE_VECTOR_SUBPARTS (vectype
));
6515 mask_elt_type
= lang_hooks
.types
.type_for_mode
6516 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
6517 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6519 auto_vec
<tree
, 32> mask_elts (nunits
);
6520 for (unsigned int i
= 0; i
< nunits
; ++i
)
6521 mask_elts
.quick_push (build_int_cst (mask_elt_type
, sel
[i
]));
6522 mask_vec
= build_vector (mask_type
, mask_elts
);
6527 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6528 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6531 vect_gen_perm_mask_checked (tree vectype
, vec_perm_indices sel
)
6533 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
));
6534 return vect_gen_perm_mask_any (vectype
, sel
);
6537 /* Given a vector variable X and Y, that was generated for the scalar
6538 STMT, generate instructions to permute the vector elements of X and Y
6539 using permutation mask MASK_VEC, insert them at *GSI and return the
6540 permuted vector variable. */
6543 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6544 gimple_stmt_iterator
*gsi
)
6546 tree vectype
= TREE_TYPE (x
);
6547 tree perm_dest
, data_ref
;
6550 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6551 data_ref
= make_ssa_name (perm_dest
);
6553 /* Generate the permute statement. */
6554 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6555 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6560 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6561 inserting them on the loops preheader edge. Returns true if we
6562 were successful in doing so (and thus STMT can be moved then),
6563 otherwise returns false. */
6566 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6572 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6574 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6575 if (!gimple_nop_p (def_stmt
)
6576 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6578 /* Make sure we don't need to recurse. While we could do
6579 so in simple cases when there are more complex use webs
6580 we don't have an easy way to preserve stmt order to fulfil
6581 dependencies within them. */
6584 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6586 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6588 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6589 if (!gimple_nop_p (def_stmt2
)
6590 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6600 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6602 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6603 if (!gimple_nop_p (def_stmt
)
6604 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6606 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6607 gsi_remove (&gsi
, false);
6608 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6615 /* vectorizable_load.
6617 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6619 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6620 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6621 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6624 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6625 slp_tree slp_node
, slp_instance slp_node_instance
)
6628 tree vec_dest
= NULL
;
6629 tree data_ref
= NULL
;
6630 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6631 stmt_vec_info prev_stmt_info
;
6632 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6633 struct loop
*loop
= NULL
;
6634 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6635 bool nested_in_vect_loop
= false;
6636 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6640 gimple
*new_stmt
= NULL
;
6642 enum dr_alignment_support alignment_support_scheme
;
6643 tree dataref_ptr
= NULL_TREE
;
6644 tree dataref_offset
= NULL_TREE
;
6645 gimple
*ptr_incr
= NULL
;
6647 int i
, j
, group_size
, group_gap_adj
;
6648 tree msq
= NULL_TREE
, lsq
;
6649 tree offset
= NULL_TREE
;
6650 tree byte_offset
= NULL_TREE
;
6651 tree realignment_token
= NULL_TREE
;
6653 vec
<tree
> dr_chain
= vNULL
;
6654 bool grouped_load
= false;
6656 gimple
*first_stmt_for_drptr
= NULL
;
6658 bool compute_in_loop
= false;
6659 struct loop
*at_loop
;
6661 bool slp
= (slp_node
!= NULL
);
6662 bool slp_perm
= false;
6663 enum tree_code code
;
6664 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6667 gather_scatter_info gs_info
;
6668 vec_info
*vinfo
= stmt_info
->vinfo
;
6671 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6674 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6678 /* Is vectorizable load? */
6679 if (!is_gimple_assign (stmt
))
6682 scalar_dest
= gimple_assign_lhs (stmt
);
6683 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6686 code
= gimple_assign_rhs_code (stmt
);
6687 if (code
!= ARRAY_REF
6688 && code
!= BIT_FIELD_REF
6689 && code
!= INDIRECT_REF
6690 && code
!= COMPONENT_REF
6691 && code
!= IMAGPART_EXPR
6692 && code
!= REALPART_EXPR
6694 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6697 if (!STMT_VINFO_DATA_REF (stmt_info
))
6700 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6701 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6705 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6706 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6707 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6712 /* Multiple types in SLP are handled by creating the appropriate number of
6713 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6718 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6720 gcc_assert (ncopies
>= 1);
6722 /* FORNOW. This restriction should be relaxed. */
6723 if (nested_in_vect_loop
&& ncopies
> 1)
6725 if (dump_enabled_p ())
6726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6727 "multiple types in nested loop.\n");
6731 /* Invalidate assumptions made by dependence analysis when vectorization
6732 on the unrolled body effectively re-orders stmts. */
6734 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6735 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6736 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6738 if (dump_enabled_p ())
6739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6740 "cannot perform implicit CSE when unrolling "
6741 "with negative dependence distance\n");
6745 elem_type
= TREE_TYPE (vectype
);
6746 mode
= TYPE_MODE (vectype
);
6748 /* FORNOW. In some cases can vectorize even if data-type not supported
6749 (e.g. - data copies). */
6750 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6752 if (dump_enabled_p ())
6753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6754 "Aligned load, but unsupported type.\n");
6758 /* Check if the load is a part of an interleaving chain. */
6759 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6761 grouped_load
= true;
6763 gcc_assert (!nested_in_vect_loop
);
6764 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6766 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6767 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6769 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6772 /* Invalidate assumptions made by dependence analysis when vectorization
6773 on the unrolled body effectively re-orders stmts. */
6774 if (!PURE_SLP_STMT (stmt_info
)
6775 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6776 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6777 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6779 if (dump_enabled_p ())
6780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6781 "cannot perform implicit CSE when performing "
6782 "group loads with negative dependence distance\n");
6786 /* Similarly when the stmt is a load that is both part of a SLP
6787 instance and a loop vectorized stmt via the same-dr mechanism
6788 we have to give up. */
6789 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6790 && (STMT_SLP_TYPE (stmt_info
)
6791 != STMT_SLP_TYPE (vinfo_for_stmt
6792 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6794 if (dump_enabled_p ())
6795 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6796 "conflicting SLP types for CSEd load\n");
6801 vect_memory_access_type memory_access_type
;
6802 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6803 &memory_access_type
, &gs_info
))
6806 if (!vec_stmt
) /* transformation not required. */
6809 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6810 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6811 /* The SLP costs are calculated during SLP analysis. */
6812 if (!PURE_SLP_STMT (stmt_info
))
6813 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6819 gcc_assert (memory_access_type
6820 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6822 if (dump_enabled_p ())
6823 dump_printf_loc (MSG_NOTE
, vect_location
,
6824 "transform load. ncopies = %d\n", ncopies
);
6828 ensure_base_align (dr
);
6830 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6832 tree vec_oprnd0
= NULL_TREE
, op
;
6833 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6834 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6835 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6836 edge pe
= loop_preheader_edge (loop
);
6839 enum { NARROW
, NONE
, WIDEN
} modifier
;
6840 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6842 if (nunits
== gather_off_nunits
)
6844 else if (nunits
== gather_off_nunits
/ 2)
6848 auto_vec_perm_indices
sel (gather_off_nunits
);
6849 for (i
= 0; i
< gather_off_nunits
; ++i
)
6850 sel
.quick_push (i
| nunits
);
6852 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
6854 else if (nunits
== gather_off_nunits
* 2)
6858 auto_vec_perm_indices
sel (nunits
);
6859 for (i
= 0; i
< nunits
; ++i
)
6860 sel
.quick_push (i
< gather_off_nunits
6861 ? i
: i
+ nunits
- gather_off_nunits
);
6863 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6869 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6870 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6871 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6872 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6873 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6874 scaletype
= TREE_VALUE (arglist
);
6875 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6877 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6879 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6880 if (!is_gimple_min_invariant (ptr
))
6882 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6883 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6884 gcc_assert (!new_bb
);
6887 /* Currently we support only unconditional gather loads,
6888 so mask should be all ones. */
6889 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6890 mask
= build_int_cst (masktype
, -1);
6891 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6893 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6894 mask
= build_vector_from_val (masktype
, mask
);
6895 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6897 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6901 for (j
= 0; j
< 6; ++j
)
6903 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6904 mask
= build_real (TREE_TYPE (masktype
), r
);
6905 mask
= build_vector_from_val (masktype
, mask
);
6906 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6911 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6913 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6914 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6915 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6919 for (j
= 0; j
< 6; ++j
)
6921 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6922 merge
= build_real (TREE_TYPE (rettype
), r
);
6926 merge
= build_vector_from_val (rettype
, merge
);
6927 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6929 prev_stmt_info
= NULL
;
6930 for (j
= 0; j
< ncopies
; ++j
)
6932 if (modifier
== WIDEN
&& (j
& 1))
6933 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6934 perm_mask
, stmt
, gsi
);
6937 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6940 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6942 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6944 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6945 == TYPE_VECTOR_SUBPARTS (idxtype
));
6946 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6947 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6949 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6950 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6955 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6957 if (!useless_type_conversion_p (vectype
, rettype
))
6959 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6960 == TYPE_VECTOR_SUBPARTS (rettype
));
6961 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6962 gimple_call_set_lhs (new_stmt
, op
);
6963 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6964 var
= make_ssa_name (vec_dest
);
6965 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6967 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6971 var
= make_ssa_name (vec_dest
, new_stmt
);
6972 gimple_call_set_lhs (new_stmt
, var
);
6975 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6977 if (modifier
== NARROW
)
6984 var
= permute_vec_elements (prev_res
, var
,
6985 perm_mask
, stmt
, gsi
);
6986 new_stmt
= SSA_NAME_DEF_STMT (var
);
6989 if (prev_stmt_info
== NULL
)
6990 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6992 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6993 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6998 if (memory_access_type
== VMAT_ELEMENTWISE
6999 || memory_access_type
== VMAT_STRIDED_SLP
)
7001 gimple_stmt_iterator incr_gsi
;
7007 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7008 gimple_seq stmts
= NULL
;
7009 tree stride_base
, stride_step
, alias_off
;
7011 gcc_assert (!nested_in_vect_loop
);
7013 if (slp
&& grouped_load
)
7015 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7016 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7017 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7018 ref_type
= get_group_alias_ptr_type (first_stmt
);
7025 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7029 = fold_build_pointer_plus
7030 (DR_BASE_ADDRESS (first_dr
),
7031 size_binop (PLUS_EXPR
,
7032 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7033 convert_to_ptrofftype (DR_INIT (first_dr
))));
7034 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7036 /* For a load with loop-invariant (but other than power-of-2)
7037 stride (i.e. not a grouped access) like so:
7039 for (i = 0; i < n; i += stride)
7042 we generate a new induction variable and new accesses to
7043 form a new vector (or vectors, depending on ncopies):
7045 for (j = 0; ; j += VF*stride)
7047 tmp2 = array[j + stride];
7049 vectemp = {tmp1, tmp2, ...}
7052 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7053 build_int_cst (TREE_TYPE (stride_step
), vf
));
7055 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7057 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
7058 loop
, &incr_gsi
, insert_after
,
7060 incr
= gsi_stmt (incr_gsi
);
7061 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7063 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
7064 &stmts
, true, NULL_TREE
);
7066 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
7068 prev_stmt_info
= NULL
;
7069 running_off
= offvar
;
7070 alias_off
= build_int_cst (ref_type
, 0);
7071 int nloads
= nunits
;
7073 tree ltype
= TREE_TYPE (vectype
);
7074 tree lvectype
= vectype
;
7075 auto_vec
<tree
> dr_chain
;
7076 if (memory_access_type
== VMAT_STRIDED_SLP
)
7078 if (group_size
< nunits
)
7080 /* First check if vec_init optab supports construction from
7081 vector elts directly. */
7082 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7084 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7085 && VECTOR_MODE_P (vmode
)
7086 && (convert_optab_handler (vec_init_optab
,
7087 TYPE_MODE (vectype
), vmode
)
7088 != CODE_FOR_nothing
))
7090 nloads
= nunits
/ group_size
;
7092 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7096 /* Otherwise avoid emitting a constructor of vector elements
7097 by performing the loads using an integer type of the same
7098 size, constructing a vector of those and then
7099 re-interpreting it as the original vector type.
7100 This avoids a huge runtime penalty due to the general
7101 inability to perform store forwarding from smaller stores
7102 to a larger load. */
7104 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7105 elmode
= int_mode_for_size (lsize
, 0).require ();
7106 /* If we can't construct such a vector fall back to
7107 element loads of the original vector type. */
7108 if (mode_for_vector (elmode
,
7109 nunits
/ group_size
).exists (&vmode
)
7110 && VECTOR_MODE_P (vmode
)
7111 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7112 != CODE_FOR_nothing
))
7114 nloads
= nunits
/ group_size
;
7116 ltype
= build_nonstandard_integer_type (lsize
, 1);
7117 lvectype
= build_vector_type (ltype
, nloads
);
7127 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7131 /* For SLP permutation support we need to load the whole group,
7132 not only the number of vector stmts the permutation result
7136 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
7137 dr_chain
.create (ncopies
);
7140 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7143 unsigned HOST_WIDE_INT
7144 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7145 for (j
= 0; j
< ncopies
; j
++)
7148 vec_alloc (v
, nloads
);
7149 for (i
= 0; i
< nloads
; i
++)
7151 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7153 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7154 build2 (MEM_REF
, ltype
,
7155 running_off
, this_off
));
7156 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7158 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7159 gimple_assign_lhs (new_stmt
));
7163 || group_el
== group_size
)
7165 tree newoff
= copy_ssa_name (running_off
);
7166 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7167 running_off
, stride_step
);
7168 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7170 running_off
= newoff
;
7176 tree vec_inv
= build_constructor (lvectype
, v
);
7177 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7178 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7179 if (lvectype
!= vectype
)
7181 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7183 build1 (VIEW_CONVERT_EXPR
,
7184 vectype
, new_temp
));
7185 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7192 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7194 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7199 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7201 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7202 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7208 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7209 slp_node_instance
, false, &n_perms
);
7216 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7217 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7218 /* For SLP vectorization we directly vectorize a subchain
7219 without permutation. */
7220 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7221 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7222 /* For BB vectorization always use the first stmt to base
7223 the data ref pointer on. */
7225 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7227 /* Check if the chain of loads is already vectorized. */
7228 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7229 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7230 ??? But we can only do so if there is exactly one
7231 as we have no way to get at the rest. Leave the CSE
7233 ??? With the group load eventually participating
7234 in multiple different permutations (having multiple
7235 slp nodes which refer to the same group) the CSE
7236 is even wrong code. See PR56270. */
7239 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7242 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7245 /* VEC_NUM is the number of vect stmts to be created for this group. */
7248 grouped_load
= false;
7249 /* For SLP permutation support we need to load the whole group,
7250 not only the number of vector stmts the permutation result
7254 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
7255 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7259 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7261 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7265 vec_num
= group_size
;
7267 ref_type
= get_group_alias_ptr_type (first_stmt
);
7273 group_size
= vec_num
= 1;
7275 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7278 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7279 gcc_assert (alignment_support_scheme
);
7280 /* Targets with load-lane instructions must not require explicit
7282 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7283 || alignment_support_scheme
== dr_aligned
7284 || alignment_support_scheme
== dr_unaligned_supported
);
7286 /* In case the vectorization factor (VF) is bigger than the number
7287 of elements that we can fit in a vectype (nunits), we have to generate
7288 more than one vector stmt - i.e - we need to "unroll" the
7289 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7290 from one copy of the vector stmt to the next, in the field
7291 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7292 stages to find the correct vector defs to be used when vectorizing
7293 stmts that use the defs of the current stmt. The example below
7294 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7295 need to create 4 vectorized stmts):
7297 before vectorization:
7298 RELATED_STMT VEC_STMT
7302 step 1: vectorize stmt S1:
7303 We first create the vector stmt VS1_0, and, as usual, record a
7304 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7305 Next, we create the vector stmt VS1_1, and record a pointer to
7306 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7307 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7309 RELATED_STMT VEC_STMT
7310 VS1_0: vx0 = memref0 VS1_1 -
7311 VS1_1: vx1 = memref1 VS1_2 -
7312 VS1_2: vx2 = memref2 VS1_3 -
7313 VS1_3: vx3 = memref3 - -
7314 S1: x = load - VS1_0
7317 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7318 information we recorded in RELATED_STMT field is used to vectorize
7321 /* In case of interleaving (non-unit grouped access):
7328 Vectorized loads are created in the order of memory accesses
7329 starting from the access of the first stmt of the chain:
7332 VS2: vx1 = &base + vec_size*1
7333 VS3: vx3 = &base + vec_size*2
7334 VS4: vx4 = &base + vec_size*3
7336 Then permutation statements are generated:
7338 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7339 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7342 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7343 (the order of the data-refs in the output of vect_permute_load_chain
7344 corresponds to the order of scalar stmts in the interleaving chain - see
7345 the documentation of vect_permute_load_chain()).
7346 The generation of permutation stmts and recording them in
7347 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7349 In case of both multiple types and interleaving, the vector loads and
7350 permutation stmts above are created for every copy. The result vector
7351 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7352 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7354 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7355 on a target that supports unaligned accesses (dr_unaligned_supported)
7356 we generate the following code:
7360 p = p + indx * vectype_size;
7365 Otherwise, the data reference is potentially unaligned on a target that
7366 does not support unaligned accesses (dr_explicit_realign_optimized) -
7367 then generate the following code, in which the data in each iteration is
7368 obtained by two vector loads, one from the previous iteration, and one
7369 from the current iteration:
7371 msq_init = *(floor(p1))
7372 p2 = initial_addr + VS - 1;
7373 realignment_token = call target_builtin;
7376 p2 = p2 + indx * vectype_size
7378 vec_dest = realign_load (msq, lsq, realignment_token)
7383 /* If the misalignment remains the same throughout the execution of the
7384 loop, we can create the init_addr and permutation mask at the loop
7385 preheader. Otherwise, it needs to be created inside the loop.
7386 This can only occur when vectorizing memory accesses in the inner-loop
7387 nested within an outer-loop that is being vectorized. */
7389 if (nested_in_vect_loop
7390 && (DR_STEP_ALIGNMENT (dr
) % GET_MODE_SIZE (TYPE_MODE (vectype
))) != 0)
7392 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7393 compute_in_loop
= true;
7396 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7397 || alignment_support_scheme
== dr_explicit_realign
)
7398 && !compute_in_loop
)
7400 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7401 alignment_support_scheme
, NULL_TREE
,
7403 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7405 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7406 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7413 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7414 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7416 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7417 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7419 aggr_type
= vectype
;
7421 prev_stmt_info
= NULL
;
7423 for (j
= 0; j
< ncopies
; j
++)
7425 /* 1. Create the vector or array pointer update chain. */
7428 bool simd_lane_access_p
7429 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7430 if (simd_lane_access_p
7431 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7432 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7433 && integer_zerop (DR_OFFSET (first_dr
))
7434 && integer_zerop (DR_INIT (first_dr
))
7435 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7436 get_alias_set (TREE_TYPE (ref_type
)))
7437 && (alignment_support_scheme
== dr_aligned
7438 || alignment_support_scheme
== dr_unaligned_supported
))
7440 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7441 dataref_offset
= build_int_cst (ref_type
, 0);
7444 else if (first_stmt_for_drptr
7445 && first_stmt
!= first_stmt_for_drptr
)
7448 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7449 at_loop
, offset
, &dummy
, gsi
,
7450 &ptr_incr
, simd_lane_access_p
,
7451 &inv_p
, byte_offset
);
7452 /* Adjust the pointer by the difference to first_stmt. */
7453 data_reference_p ptrdr
7454 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7455 tree diff
= fold_convert (sizetype
,
7456 size_binop (MINUS_EXPR
,
7459 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7464 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7465 offset
, &dummy
, gsi
, &ptr_incr
,
7466 simd_lane_access_p
, &inv_p
,
7469 else if (dataref_offset
)
7470 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7471 TYPE_SIZE_UNIT (aggr_type
));
7473 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7474 TYPE_SIZE_UNIT (aggr_type
));
7476 if (grouped_load
|| slp_perm
)
7477 dr_chain
.create (vec_num
);
7479 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7483 vec_array
= create_vector_array (vectype
, vec_num
);
7486 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7487 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7488 gcall
*call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1,
7490 gimple_call_set_lhs (call
, vec_array
);
7491 gimple_call_set_nothrow (call
, true);
7493 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7495 /* Extract each vector into an SSA_NAME. */
7496 for (i
= 0; i
< vec_num
; i
++)
7498 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7500 dr_chain
.quick_push (new_temp
);
7503 /* Record the mapping between SSA_NAMEs and statements. */
7504 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7508 for (i
= 0; i
< vec_num
; i
++)
7511 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7514 /* 2. Create the vector-load in the loop. */
7515 switch (alignment_support_scheme
)
7518 case dr_unaligned_supported
:
7520 unsigned int align
, misalign
;
7523 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7526 : build_int_cst (ref_type
, 0));
7527 align
= DR_TARGET_ALIGNMENT (dr
);
7528 if (alignment_support_scheme
== dr_aligned
)
7530 gcc_assert (aligned_access_p (first_dr
));
7533 else if (DR_MISALIGNMENT (first_dr
) == -1)
7535 align
= dr_alignment (vect_dr_behavior (first_dr
));
7537 TREE_TYPE (data_ref
)
7538 = build_aligned_type (TREE_TYPE (data_ref
),
7539 align
* BITS_PER_UNIT
);
7543 TREE_TYPE (data_ref
)
7544 = build_aligned_type (TREE_TYPE (data_ref
),
7545 TYPE_ALIGN (elem_type
));
7546 misalign
= DR_MISALIGNMENT (first_dr
);
7548 if (dataref_offset
== NULL_TREE
7549 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7550 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7554 case dr_explicit_realign
:
7558 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7560 if (compute_in_loop
)
7561 msq
= vect_setup_realignment (first_stmt
, gsi
,
7563 dr_explicit_realign
,
7566 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7567 ptr
= copy_ssa_name (dataref_ptr
);
7569 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7570 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7571 new_stmt
= gimple_build_assign
7572 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7574 (TREE_TYPE (dataref_ptr
),
7575 -(HOST_WIDE_INT
) align
));
7576 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7578 = build2 (MEM_REF
, vectype
, ptr
,
7579 build_int_cst (ref_type
, 0));
7580 vec_dest
= vect_create_destination_var (scalar_dest
,
7582 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7583 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7584 gimple_assign_set_lhs (new_stmt
, new_temp
);
7585 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7586 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7587 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7590 bump
= size_binop (MULT_EXPR
, vs
,
7591 TYPE_SIZE_UNIT (elem_type
));
7592 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7593 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7594 new_stmt
= gimple_build_assign
7595 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7597 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
7598 ptr
= copy_ssa_name (ptr
, new_stmt
);
7599 gimple_assign_set_lhs (new_stmt
, ptr
);
7600 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7602 = build2 (MEM_REF
, vectype
, ptr
,
7603 build_int_cst (ref_type
, 0));
7606 case dr_explicit_realign_optimized
:
7608 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7609 new_temp
= copy_ssa_name (dataref_ptr
);
7611 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7612 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7613 new_stmt
= gimple_build_assign
7614 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7615 build_int_cst (TREE_TYPE (dataref_ptr
),
7616 -(HOST_WIDE_INT
) align
));
7617 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7619 = build2 (MEM_REF
, vectype
, new_temp
,
7620 build_int_cst (ref_type
, 0));
7626 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7627 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7628 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7629 gimple_assign_set_lhs (new_stmt
, new_temp
);
7630 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7632 /* 3. Handle explicit realignment if necessary/supported.
7634 vec_dest = realign_load (msq, lsq, realignment_token) */
7635 if (alignment_support_scheme
== dr_explicit_realign_optimized
7636 || alignment_support_scheme
== dr_explicit_realign
)
7638 lsq
= gimple_assign_lhs (new_stmt
);
7639 if (!realignment_token
)
7640 realignment_token
= dataref_ptr
;
7641 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7642 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7643 msq
, lsq
, realignment_token
);
7644 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7645 gimple_assign_set_lhs (new_stmt
, new_temp
);
7646 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7648 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7651 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7652 add_phi_arg (phi
, lsq
,
7653 loop_latch_edge (containing_loop
),
7659 /* 4. Handle invariant-load. */
7660 if (inv_p
&& !bb_vinfo
)
7662 gcc_assert (!grouped_load
);
7663 /* If we have versioned for aliasing or the loop doesn't
7664 have any data dependencies that would preclude this,
7665 then we are sure this is a loop invariant load and
7666 thus we can insert it on the preheader edge. */
7667 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7668 && !nested_in_vect_loop
7669 && hoist_defs_of_uses (stmt
, loop
))
7671 if (dump_enabled_p ())
7673 dump_printf_loc (MSG_NOTE
, vect_location
,
7674 "hoisting out of the vectorized "
7676 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7678 tree tem
= copy_ssa_name (scalar_dest
);
7679 gsi_insert_on_edge_immediate
7680 (loop_preheader_edge (loop
),
7681 gimple_build_assign (tem
,
7683 (gimple_assign_rhs1 (stmt
))));
7684 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7685 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7686 set_vinfo_for_stmt (new_stmt
,
7687 new_stmt_vec_info (new_stmt
, vinfo
));
7691 gimple_stmt_iterator gsi2
= *gsi
;
7693 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7695 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7699 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7701 tree perm_mask
= perm_mask_for_reverse (vectype
);
7702 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7703 perm_mask
, stmt
, gsi
);
7704 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7707 /* Collect vector loads and later create their permutation in
7708 vect_transform_grouped_load (). */
7709 if (grouped_load
|| slp_perm
)
7710 dr_chain
.quick_push (new_temp
);
7712 /* Store vector loads in the corresponding SLP_NODE. */
7713 if (slp
&& !slp_perm
)
7714 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7716 /* With SLP permutation we load the gaps as well, without
7717 we need to skip the gaps after we manage to fully load
7718 all elements. group_gap_adj is GROUP_SIZE here. */
7719 group_elt
+= nunits
;
7720 if (group_gap_adj
!= 0 && ! slp_perm
7721 && group_elt
== group_size
- group_gap_adj
)
7723 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7725 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7726 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7731 /* Bump the vector pointer to account for a gap or for excess
7732 elements loaded for a permuted SLP load. */
7733 if (group_gap_adj
!= 0 && slp_perm
)
7735 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7737 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7738 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7743 if (slp
&& !slp_perm
)
7749 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7750 slp_node_instance
, false,
7753 dr_chain
.release ();
7761 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7762 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7763 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7768 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7770 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7771 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7774 dr_chain
.release ();
7780 /* Function vect_is_simple_cond.
7783 LOOP - the loop that is being vectorized.
7784 COND - Condition that is checked for simple use.
7787 *COMP_VECTYPE - the vector type for the comparison.
7788 *DTS - The def types for the arguments of the comparison
7790 Returns whether a COND can be vectorized. Checks whether
7791 condition operands are supportable using vec_is_simple_use. */
7794 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
7795 tree
*comp_vectype
, enum vect_def_type
*dts
)
7798 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7801 if (TREE_CODE (cond
) == SSA_NAME
7802 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
7804 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7805 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7806 &dts
[0], comp_vectype
)
7808 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7813 if (!COMPARISON_CLASS_P (cond
))
7816 lhs
= TREE_OPERAND (cond
, 0);
7817 rhs
= TREE_OPERAND (cond
, 1);
7819 if (TREE_CODE (lhs
) == SSA_NAME
)
7821 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7822 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
7825 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
7826 || TREE_CODE (lhs
) == FIXED_CST
)
7827 dts
[0] = vect_constant_def
;
7831 if (TREE_CODE (rhs
) == SSA_NAME
)
7833 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7834 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
7837 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
7838 || TREE_CODE (rhs
) == FIXED_CST
)
7839 dts
[1] = vect_constant_def
;
7843 if (vectype1
&& vectype2
7844 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7847 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7851 /* vectorizable_condition.
7853 Check if STMT is conditional modify expression that can be vectorized.
7854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7855 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7858 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7859 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7860 else clause if it is 2).
7862 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7865 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7866 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7869 tree scalar_dest
= NULL_TREE
;
7870 tree vec_dest
= NULL_TREE
;
7871 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
7872 tree then_clause
, else_clause
;
7873 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7874 tree comp_vectype
= NULL_TREE
;
7875 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7876 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7879 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7880 enum vect_def_type dts
[4]
7881 = {vect_unknown_def_type
, vect_unknown_def_type
,
7882 vect_unknown_def_type
, vect_unknown_def_type
};
7885 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7886 stmt_vec_info prev_stmt_info
= NULL
;
7888 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7889 vec
<tree
> vec_oprnds0
= vNULL
;
7890 vec
<tree
> vec_oprnds1
= vNULL
;
7891 vec
<tree
> vec_oprnds2
= vNULL
;
7892 vec
<tree
> vec_oprnds3
= vNULL
;
7894 bool masked
= false;
7896 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7899 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7901 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7904 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7905 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7909 /* FORNOW: not yet supported. */
7910 if (STMT_VINFO_LIVE_P (stmt_info
))
7912 if (dump_enabled_p ())
7913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7914 "value used after loop.\n");
7919 /* Is vectorizable conditional operation? */
7920 if (!is_gimple_assign (stmt
))
7923 code
= gimple_assign_rhs_code (stmt
);
7925 if (code
!= COND_EXPR
)
7928 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7929 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7934 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7936 gcc_assert (ncopies
>= 1);
7937 if (reduc_index
&& ncopies
> 1)
7938 return false; /* FORNOW */
7940 cond_expr
= gimple_assign_rhs1 (stmt
);
7941 then_clause
= gimple_assign_rhs2 (stmt
);
7942 else_clause
= gimple_assign_rhs3 (stmt
);
7944 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
7945 &comp_vectype
, &dts
[0])
7950 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
7953 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
7957 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7960 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7963 masked
= !COMPARISON_CLASS_P (cond_expr
);
7964 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7966 if (vec_cmp_type
== NULL_TREE
)
7969 cond_code
= TREE_CODE (cond_expr
);
7972 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
7973 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
7976 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
7978 /* Boolean values may have another representation in vectors
7979 and therefore we prefer bit operations over comparison for
7980 them (which also works for scalar masks). We store opcodes
7981 to use in bitop1 and bitop2. Statement is vectorized as
7982 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7983 depending on bitop1 and bitop2 arity. */
7987 bitop1
= BIT_NOT_EXPR
;
7988 bitop2
= BIT_AND_EXPR
;
7991 bitop1
= BIT_NOT_EXPR
;
7992 bitop2
= BIT_IOR_EXPR
;
7995 bitop1
= BIT_NOT_EXPR
;
7996 bitop2
= BIT_AND_EXPR
;
7997 std::swap (cond_expr0
, cond_expr1
);
8000 bitop1
= BIT_NOT_EXPR
;
8001 bitop2
= BIT_IOR_EXPR
;
8002 std::swap (cond_expr0
, cond_expr1
);
8005 bitop1
= BIT_XOR_EXPR
;
8008 bitop1
= BIT_XOR_EXPR
;
8009 bitop2
= BIT_NOT_EXPR
;
8014 cond_code
= SSA_NAME
;
8019 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8020 if (bitop1
!= NOP_EXPR
)
8022 machine_mode mode
= TYPE_MODE (comp_vectype
);
8025 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8026 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8029 if (bitop2
!= NOP_EXPR
)
8031 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8033 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8037 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8040 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
8050 vec_oprnds0
.create (1);
8051 vec_oprnds1
.create (1);
8052 vec_oprnds2
.create (1);
8053 vec_oprnds3
.create (1);
8057 scalar_dest
= gimple_assign_lhs (stmt
);
8058 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8060 /* Handle cond expr. */
8061 for (j
= 0; j
< ncopies
; j
++)
8063 gassign
*new_stmt
= NULL
;
8068 auto_vec
<tree
, 4> ops
;
8069 auto_vec
<vec
<tree
>, 4> vec_defs
;
8072 ops
.safe_push (cond_expr
);
8075 ops
.safe_push (cond_expr0
);
8076 ops
.safe_push (cond_expr1
);
8078 ops
.safe_push (then_clause
);
8079 ops
.safe_push (else_clause
);
8080 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8081 vec_oprnds3
= vec_defs
.pop ();
8082 vec_oprnds2
= vec_defs
.pop ();
8084 vec_oprnds1
= vec_defs
.pop ();
8085 vec_oprnds0
= vec_defs
.pop ();
8093 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8095 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8101 = vect_get_vec_def_for_operand (cond_expr0
,
8102 stmt
, comp_vectype
);
8103 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8106 = vect_get_vec_def_for_operand (cond_expr1
,
8107 stmt
, comp_vectype
);
8108 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8110 if (reduc_index
== 1)
8111 vec_then_clause
= reduc_def
;
8114 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8116 vect_is_simple_use (then_clause
, loop_vinfo
,
8119 if (reduc_index
== 2)
8120 vec_else_clause
= reduc_def
;
8123 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8125 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8132 = vect_get_vec_def_for_stmt_copy (dts
[0],
8133 vec_oprnds0
.pop ());
8136 = vect_get_vec_def_for_stmt_copy (dts
[1],
8137 vec_oprnds1
.pop ());
8139 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8140 vec_oprnds2
.pop ());
8141 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8142 vec_oprnds3
.pop ());
8147 vec_oprnds0
.quick_push (vec_cond_lhs
);
8149 vec_oprnds1
.quick_push (vec_cond_rhs
);
8150 vec_oprnds2
.quick_push (vec_then_clause
);
8151 vec_oprnds3
.quick_push (vec_else_clause
);
8154 /* Arguments are ready. Create the new vector stmt. */
8155 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8157 vec_then_clause
= vec_oprnds2
[i
];
8158 vec_else_clause
= vec_oprnds3
[i
];
8161 vec_compare
= vec_cond_lhs
;
8164 vec_cond_rhs
= vec_oprnds1
[i
];
8165 if (bitop1
== NOP_EXPR
)
8166 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8167 vec_cond_lhs
, vec_cond_rhs
);
8170 new_temp
= make_ssa_name (vec_cmp_type
);
8171 if (bitop1
== BIT_NOT_EXPR
)
8172 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8176 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8178 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8179 if (bitop2
== NOP_EXPR
)
8180 vec_compare
= new_temp
;
8181 else if (bitop2
== BIT_NOT_EXPR
)
8183 /* Instead of doing ~x ? y : z do x ? z : y. */
8184 vec_compare
= new_temp
;
8185 std::swap (vec_then_clause
, vec_else_clause
);
8189 vec_compare
= make_ssa_name (vec_cmp_type
);
8191 = gimple_build_assign (vec_compare
, bitop2
,
8192 vec_cond_lhs
, new_temp
);
8193 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8197 new_temp
= make_ssa_name (vec_dest
);
8198 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8199 vec_compare
, vec_then_clause
,
8201 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8203 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8210 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8212 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8214 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8217 vec_oprnds0
.release ();
8218 vec_oprnds1
.release ();
8219 vec_oprnds2
.release ();
8220 vec_oprnds3
.release ();
8225 /* vectorizable_comparison.
8227 Check if STMT is comparison expression that can be vectorized.
8228 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8229 comparison, put it in VEC_STMT, and insert it at GSI.
8231 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8234 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8235 gimple
**vec_stmt
, tree reduc_def
,
8238 tree lhs
, rhs1
, rhs2
;
8239 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8240 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8241 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8242 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8244 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8245 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8249 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8250 stmt_vec_info prev_stmt_info
= NULL
;
8252 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8253 vec
<tree
> vec_oprnds0
= vNULL
;
8254 vec
<tree
> vec_oprnds1
= vNULL
;
8259 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8262 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8265 mask_type
= vectype
;
8266 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8271 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8273 gcc_assert (ncopies
>= 1);
8274 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8275 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8279 if (STMT_VINFO_LIVE_P (stmt_info
))
8281 if (dump_enabled_p ())
8282 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8283 "value used after loop.\n");
8287 if (!is_gimple_assign (stmt
))
8290 code
= gimple_assign_rhs_code (stmt
);
8292 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8295 rhs1
= gimple_assign_rhs1 (stmt
);
8296 rhs2
= gimple_assign_rhs2 (stmt
);
8298 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8299 &dts
[0], &vectype1
))
8302 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8303 &dts
[1], &vectype2
))
8306 if (vectype1
&& vectype2
8307 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
8310 vectype
= vectype1
? vectype1
: vectype2
;
8312 /* Invariant comparison. */
8315 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8316 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
8319 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
8322 /* Can't compare mask and non-mask types. */
8323 if (vectype1
&& vectype2
8324 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8327 /* Boolean values may have another representation in vectors
8328 and therefore we prefer bit operations over comparison for
8329 them (which also works for scalar masks). We store opcodes
8330 to use in bitop1 and bitop2. Statement is vectorized as
8331 BITOP2 (rhs1 BITOP1 rhs2) or
8332 rhs1 BITOP2 (BITOP1 rhs2)
8333 depending on bitop1 and bitop2 arity. */
8334 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8336 if (code
== GT_EXPR
)
8338 bitop1
= BIT_NOT_EXPR
;
8339 bitop2
= BIT_AND_EXPR
;
8341 else if (code
== GE_EXPR
)
8343 bitop1
= BIT_NOT_EXPR
;
8344 bitop2
= BIT_IOR_EXPR
;
8346 else if (code
== LT_EXPR
)
8348 bitop1
= BIT_NOT_EXPR
;
8349 bitop2
= BIT_AND_EXPR
;
8350 std::swap (rhs1
, rhs2
);
8351 std::swap (dts
[0], dts
[1]);
8353 else if (code
== LE_EXPR
)
8355 bitop1
= BIT_NOT_EXPR
;
8356 bitop2
= BIT_IOR_EXPR
;
8357 std::swap (rhs1
, rhs2
);
8358 std::swap (dts
[0], dts
[1]);
8362 bitop1
= BIT_XOR_EXPR
;
8363 if (code
== EQ_EXPR
)
8364 bitop2
= BIT_NOT_EXPR
;
8370 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8371 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8372 dts
, ndts
, NULL
, NULL
);
8373 if (bitop1
== NOP_EXPR
)
8374 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8377 machine_mode mode
= TYPE_MODE (vectype
);
8380 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8381 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8384 if (bitop2
!= NOP_EXPR
)
8386 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8387 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8397 vec_oprnds0
.create (1);
8398 vec_oprnds1
.create (1);
8402 lhs
= gimple_assign_lhs (stmt
);
8403 mask
= vect_create_destination_var (lhs
, mask_type
);
8405 /* Handle cmp expr. */
8406 for (j
= 0; j
< ncopies
; j
++)
8408 gassign
*new_stmt
= NULL
;
8413 auto_vec
<tree
, 2> ops
;
8414 auto_vec
<vec
<tree
>, 2> vec_defs
;
8416 ops
.safe_push (rhs1
);
8417 ops
.safe_push (rhs2
);
8418 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8419 vec_oprnds1
= vec_defs
.pop ();
8420 vec_oprnds0
= vec_defs
.pop ();
8424 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8425 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8430 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8431 vec_oprnds0
.pop ());
8432 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8433 vec_oprnds1
.pop ());
8438 vec_oprnds0
.quick_push (vec_rhs1
);
8439 vec_oprnds1
.quick_push (vec_rhs2
);
8442 /* Arguments are ready. Create the new vector stmt. */
8443 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8445 vec_rhs2
= vec_oprnds1
[i
];
8447 new_temp
= make_ssa_name (mask
);
8448 if (bitop1
== NOP_EXPR
)
8450 new_stmt
= gimple_build_assign (new_temp
, code
,
8451 vec_rhs1
, vec_rhs2
);
8452 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8456 if (bitop1
== BIT_NOT_EXPR
)
8457 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8459 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8461 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8462 if (bitop2
!= NOP_EXPR
)
8464 tree res
= make_ssa_name (mask
);
8465 if (bitop2
== BIT_NOT_EXPR
)
8466 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8468 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8470 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8474 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8481 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8483 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8485 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8488 vec_oprnds0
.release ();
8489 vec_oprnds1
.release ();
8494 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8495 can handle all live statements in the node. Otherwise return true
8496 if STMT is not live or if vectorizable_live_operation can handle it.
8497 GSI and VEC_STMT are as for vectorizable_live_operation. */
8500 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8501 slp_tree slp_node
, gimple
**vec_stmt
)
8507 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8509 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8510 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8511 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8516 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
8517 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
))
8523 /* Make sure the statement is vectorizable. */
8526 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
8527 slp_instance node_instance
)
8529 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8530 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8531 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8533 gimple
*pattern_stmt
;
8534 gimple_seq pattern_def_seq
;
8536 if (dump_enabled_p ())
8538 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8539 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8542 if (gimple_has_volatile_ops (stmt
))
8544 if (dump_enabled_p ())
8545 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8546 "not vectorized: stmt has volatile operands\n");
8551 /* Skip stmts that do not need to be vectorized. In loops this is expected
8553 - the COND_EXPR which is the loop exit condition
8554 - any LABEL_EXPRs in the loop
8555 - computations that are used only for array indexing or loop control.
8556 In basic blocks we only analyze statements that are a part of some SLP
8557 instance, therefore, all the statements are relevant.
8559 Pattern statement needs to be analyzed instead of the original statement
8560 if the original statement is not relevant. Otherwise, we analyze both
8561 statements. In basic blocks we are called from some SLP instance
8562 traversal, don't analyze pattern stmts instead, the pattern stmts
8563 already will be part of SLP instance. */
8565 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8566 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8567 && !STMT_VINFO_LIVE_P (stmt_info
))
8569 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8571 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8572 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8574 /* Analyze PATTERN_STMT instead of the original stmt. */
8575 stmt
= pattern_stmt
;
8576 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8577 if (dump_enabled_p ())
8579 dump_printf_loc (MSG_NOTE
, vect_location
,
8580 "==> examining pattern statement: ");
8581 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8586 if (dump_enabled_p ())
8587 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8592 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8595 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8596 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8598 /* Analyze PATTERN_STMT too. */
8599 if (dump_enabled_p ())
8601 dump_printf_loc (MSG_NOTE
, vect_location
,
8602 "==> examining pattern statement: ");
8603 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8606 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
8611 if (is_pattern_stmt_p (stmt_info
)
8613 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8615 gimple_stmt_iterator si
;
8617 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8619 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8620 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8621 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8623 /* Analyze def stmt of STMT if it's a pattern stmt. */
8624 if (dump_enabled_p ())
8626 dump_printf_loc (MSG_NOTE
, vect_location
,
8627 "==> examining pattern def statement: ");
8628 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8631 if (!vect_analyze_stmt (pattern_def_stmt
,
8632 need_to_vectorize
, node
, node_instance
))
8638 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8640 case vect_internal_def
:
8643 case vect_reduction_def
:
8644 case vect_nested_cycle
:
8645 gcc_assert (!bb_vinfo
8646 && (relevance
== vect_used_in_outer
8647 || relevance
== vect_used_in_outer_by_reduction
8648 || relevance
== vect_used_by_reduction
8649 || relevance
== vect_unused_in_scope
8650 || relevance
== vect_used_only_live
));
8653 case vect_induction_def
:
8654 gcc_assert (!bb_vinfo
);
8657 case vect_constant_def
:
8658 case vect_external_def
:
8659 case vect_unknown_def_type
:
8664 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8666 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8667 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8668 || (is_gimple_call (stmt
)
8669 && gimple_call_lhs (stmt
) == NULL_TREE
));
8670 *need_to_vectorize
= true;
8673 if (PURE_SLP_STMT (stmt_info
) && !node
)
8675 dump_printf_loc (MSG_NOTE
, vect_location
,
8676 "handled only by SLP analysis\n");
8682 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8683 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8684 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8685 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8686 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8687 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8688 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8689 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8690 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8691 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8692 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
)
8693 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
8694 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8695 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8699 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8700 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8701 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8702 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8703 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8704 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8705 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8706 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8707 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8708 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8713 if (dump_enabled_p ())
8715 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8716 "not vectorized: relevant stmt not ");
8717 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8718 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8727 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8728 need extra handling, except for vectorizable reductions. */
8729 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8730 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
))
8732 if (dump_enabled_p ())
8734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8735 "not vectorized: live stmt not supported: ");
8736 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8746 /* Function vect_transform_stmt.
8748 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8751 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8752 bool *grouped_store
, slp_tree slp_node
,
8753 slp_instance slp_node_instance
)
8755 bool is_store
= false;
8756 gimple
*vec_stmt
= NULL
;
8757 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8760 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8761 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8763 switch (STMT_VINFO_TYPE (stmt_info
))
8765 case type_demotion_vec_info_type
:
8766 case type_promotion_vec_info_type
:
8767 case type_conversion_vec_info_type
:
8768 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8772 case induc_vec_info_type
:
8773 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
8777 case shift_vec_info_type
:
8778 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8782 case op_vec_info_type
:
8783 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8787 case assignment_vec_info_type
:
8788 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8792 case load_vec_info_type
:
8793 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8798 case store_vec_info_type
:
8799 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8801 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8803 /* In case of interleaving, the whole chain is vectorized when the
8804 last store in the chain is reached. Store stmts before the last
8805 one are skipped, and there vec_stmt_info shouldn't be freed
8807 *grouped_store
= true;
8808 if (STMT_VINFO_VEC_STMT (stmt_info
))
8815 case condition_vec_info_type
:
8816 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8820 case comparison_vec_info_type
:
8821 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8825 case call_vec_info_type
:
8826 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8827 stmt
= gsi_stmt (*gsi
);
8828 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
8832 case call_simd_clone_vec_info_type
:
8833 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8834 stmt
= gsi_stmt (*gsi
);
8837 case reduc_vec_info_type
:
8838 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
8844 if (!STMT_VINFO_LIVE_P (stmt_info
))
8846 if (dump_enabled_p ())
8847 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8848 "stmt not supported.\n");
8853 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8854 This would break hybrid SLP vectorization. */
8856 gcc_assert (!vec_stmt
8857 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8859 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8860 is being vectorized, but outside the immediately enclosing loop. */
8862 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8863 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8864 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8865 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8866 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8867 || STMT_VINFO_RELEVANT (stmt_info
) ==
8868 vect_used_in_outer_by_reduction
))
8870 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8871 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8872 imm_use_iterator imm_iter
;
8873 use_operand_p use_p
;
8877 if (dump_enabled_p ())
8878 dump_printf_loc (MSG_NOTE
, vect_location
,
8879 "Record the vdef for outer-loop vectorization.\n");
8881 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8882 (to be used when vectorizing outer-loop stmts that use the DEF of
8884 if (gimple_code (stmt
) == GIMPLE_PHI
)
8885 scalar_dest
= PHI_RESULT (stmt
);
8887 scalar_dest
= gimple_assign_lhs (stmt
);
8889 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8891 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8893 exit_phi
= USE_STMT (use_p
);
8894 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8899 /* Handle stmts whose DEF is used outside the loop-nest that is
8900 being vectorized. */
8901 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8903 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
);
8908 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8914 /* Remove a group of stores (for SLP or interleaving), free their
8918 vect_remove_stores (gimple
*first_stmt
)
8920 gimple
*next
= first_stmt
;
8922 gimple_stmt_iterator next_si
;
8926 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8928 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8929 if (is_pattern_stmt_p (stmt_info
))
8930 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8931 /* Free the attached stmt_vec_info and remove the stmt. */
8932 next_si
= gsi_for_stmt (next
);
8933 unlink_stmt_vdef (next
);
8934 gsi_remove (&next_si
, true);
8935 release_defs (next
);
8936 free_stmt_vec_info (next
);
8942 /* Function new_stmt_vec_info.
8944 Create and initialize a new stmt_vec_info struct for STMT. */
8947 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8950 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8952 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8953 STMT_VINFO_STMT (res
) = stmt
;
8955 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8956 STMT_VINFO_LIVE_P (res
) = false;
8957 STMT_VINFO_VECTYPE (res
) = NULL
;
8958 STMT_VINFO_VEC_STMT (res
) = NULL
;
8959 STMT_VINFO_VECTORIZABLE (res
) = true;
8960 STMT_VINFO_IN_PATTERN_P (res
) = false;
8961 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8962 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8963 STMT_VINFO_DATA_REF (res
) = NULL
;
8964 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8965 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
8967 if (gimple_code (stmt
) == GIMPLE_PHI
8968 && is_loop_header_bb_p (gimple_bb (stmt
)))
8969 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8971 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8973 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8974 STMT_SLP_TYPE (res
) = loop_vect
;
8975 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8977 GROUP_FIRST_ELEMENT (res
) = NULL
;
8978 GROUP_NEXT_ELEMENT (res
) = NULL
;
8979 GROUP_SIZE (res
) = 0;
8980 GROUP_STORE_COUNT (res
) = 0;
8981 GROUP_GAP (res
) = 0;
8982 GROUP_SAME_DR_STMT (res
) = NULL
;
8988 /* Create a hash table for stmt_vec_info. */
8991 init_stmt_vec_info_vec (void)
8993 gcc_assert (!stmt_vec_info_vec
.exists ());
8994 stmt_vec_info_vec
.create (50);
8998 /* Free hash table for stmt_vec_info. */
9001 free_stmt_vec_info_vec (void)
9005 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
9007 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9008 gcc_assert (stmt_vec_info_vec
.exists ());
9009 stmt_vec_info_vec
.release ();
9013 /* Free stmt vectorization related info. */
9016 free_stmt_vec_info (gimple
*stmt
)
9018 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9023 /* Check if this statement has a related "pattern stmt"
9024 (introduced by the vectorizer during the pattern recognition
9025 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9027 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9029 stmt_vec_info patt_info
9030 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9033 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
9034 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9035 gimple_set_bb (patt_stmt
, NULL
);
9036 tree lhs
= gimple_get_lhs (patt_stmt
);
9037 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9038 release_ssa_name (lhs
);
9041 gimple_stmt_iterator si
;
9042 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
9044 gimple
*seq_stmt
= gsi_stmt (si
);
9045 gimple_set_bb (seq_stmt
, NULL
);
9046 lhs
= gimple_get_lhs (seq_stmt
);
9047 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9048 release_ssa_name (lhs
);
9049 free_stmt_vec_info (seq_stmt
);
9052 free_stmt_vec_info (patt_stmt
);
9056 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9057 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9058 set_vinfo_for_stmt (stmt
, NULL
);
9063 /* Function get_vectype_for_scalar_type_and_size.
9065 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9069 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
9071 tree orig_scalar_type
= scalar_type
;
9072 scalar_mode inner_mode
;
9073 machine_mode simd_mode
;
9077 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9078 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9081 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9083 /* For vector types of elements whose mode precision doesn't
9084 match their types precision we use a element type of mode
9085 precision. The vectorization routines will have to make sure
9086 they support the proper result truncation/extension.
9087 We also make sure to build vector types with INTEGER_TYPE
9088 component type only. */
9089 if (INTEGRAL_TYPE_P (scalar_type
)
9090 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9091 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9092 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9093 TYPE_UNSIGNED (scalar_type
));
9095 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9096 When the component mode passes the above test simply use a type
9097 corresponding to that mode. The theory is that any use that
9098 would cause problems with this will disable vectorization anyway. */
9099 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9100 && !INTEGRAL_TYPE_P (scalar_type
))
9101 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9103 /* We can't build a vector type of elements with alignment bigger than
9105 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9106 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9107 TYPE_UNSIGNED (scalar_type
));
9109 /* If we felt back to using the mode fail if there was
9110 no scalar type for it. */
9111 if (scalar_type
== NULL_TREE
)
9114 /* If no size was supplied use the mode the target prefers. Otherwise
9115 lookup a vector mode of the specified size. */
9117 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9118 else if (!mode_for_vector (inner_mode
, size
/ nbytes
).exists (&simd_mode
))
9120 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
9121 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9125 vectype
= build_vector_type (scalar_type
, nunits
);
9127 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9128 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9131 /* Re-attach the address-space qualifier if we canonicalized the scalar
9133 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9134 return build_qualified_type
9135 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9140 unsigned int current_vector_size
;
9142 /* Function get_vectype_for_scalar_type.
9144 Returns the vector type corresponding to SCALAR_TYPE as supported
9148 get_vectype_for_scalar_type (tree scalar_type
)
9151 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9152 current_vector_size
);
9154 && current_vector_size
== 0)
9155 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9159 /* Function get_mask_type_for_scalar_type.
9161 Returns the mask type corresponding to a result of comparison
9162 of vectors of specified SCALAR_TYPE as supported by target. */
9165 get_mask_type_for_scalar_type (tree scalar_type
)
9167 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9172 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9173 current_vector_size
);
9176 /* Function get_same_sized_vectype
9178 Returns a vector type corresponding to SCALAR_TYPE of size
9179 VECTOR_TYPE if supported by the target. */
9182 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9184 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9185 return build_same_sized_truth_vector_type (vector_type
);
9187 return get_vectype_for_scalar_type_and_size
9188 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9191 /* Function vect_is_simple_use.
9194 VINFO - the vect info of the loop or basic block that is being vectorized.
9195 OPERAND - operand in the loop or bb.
9197 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9198 DT - the type of definition
9200 Returns whether a stmt with OPERAND can be vectorized.
9201 For loops, supportable operands are constants, loop invariants, and operands
9202 that are defined by the current iteration of the loop. Unsupportable
9203 operands are those that are defined by a previous iteration of the loop (as
9204 is the case in reduction/induction computations).
9205 For basic blocks, supportable operands are constants and bb invariants.
9206 For now, operands defined outside the basic block are not supported. */
9209 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9210 gimple
**def_stmt
, enum vect_def_type
*dt
)
9213 *dt
= vect_unknown_def_type
;
9215 if (dump_enabled_p ())
9217 dump_printf_loc (MSG_NOTE
, vect_location
,
9218 "vect_is_simple_use: operand ");
9219 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9220 dump_printf (MSG_NOTE
, "\n");
9223 if (CONSTANT_CLASS_P (operand
))
9225 *dt
= vect_constant_def
;
9229 if (is_gimple_min_invariant (operand
))
9231 *dt
= vect_external_def
;
9235 if (TREE_CODE (operand
) != SSA_NAME
)
9237 if (dump_enabled_p ())
9238 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9243 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9245 *dt
= vect_external_def
;
9249 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9250 if (dump_enabled_p ())
9252 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9253 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9256 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9257 *dt
= vect_external_def
;
9260 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9261 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9264 if (dump_enabled_p ())
9266 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9269 case vect_uninitialized_def
:
9270 dump_printf (MSG_NOTE
, "uninitialized\n");
9272 case vect_constant_def
:
9273 dump_printf (MSG_NOTE
, "constant\n");
9275 case vect_external_def
:
9276 dump_printf (MSG_NOTE
, "external\n");
9278 case vect_internal_def
:
9279 dump_printf (MSG_NOTE
, "internal\n");
9281 case vect_induction_def
:
9282 dump_printf (MSG_NOTE
, "induction\n");
9284 case vect_reduction_def
:
9285 dump_printf (MSG_NOTE
, "reduction\n");
9287 case vect_double_reduction_def
:
9288 dump_printf (MSG_NOTE
, "double reduction\n");
9290 case vect_nested_cycle
:
9291 dump_printf (MSG_NOTE
, "nested cycle\n");
9293 case vect_unknown_def_type
:
9294 dump_printf (MSG_NOTE
, "unknown\n");
9299 if (*dt
== vect_unknown_def_type
)
9301 if (dump_enabled_p ())
9302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9303 "Unsupported pattern.\n");
9307 switch (gimple_code (*def_stmt
))
9314 if (dump_enabled_p ())
9315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9316 "unsupported defining stmt:\n");
9323 /* Function vect_is_simple_use.
9325 Same as vect_is_simple_use but also determines the vector operand
9326 type of OPERAND and stores it to *VECTYPE. If the definition of
9327 OPERAND is vect_uninitialized_def, vect_constant_def or
9328 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9329 is responsible to compute the best suited vector type for the
9333 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9334 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9336 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9339 /* Now get a vector type if the def is internal, otherwise supply
9340 NULL_TREE and leave it up to the caller to figure out a proper
9341 type for the use stmt. */
9342 if (*dt
== vect_internal_def
9343 || *dt
== vect_induction_def
9344 || *dt
== vect_reduction_def
9345 || *dt
== vect_double_reduction_def
9346 || *dt
== vect_nested_cycle
)
9348 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9350 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9351 && !STMT_VINFO_RELEVANT (stmt_info
)
9352 && !STMT_VINFO_LIVE_P (stmt_info
))
9353 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9355 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9356 gcc_assert (*vectype
!= NULL_TREE
);
9358 else if (*dt
== vect_uninitialized_def
9359 || *dt
== vect_constant_def
9360 || *dt
== vect_external_def
)
9361 *vectype
= NULL_TREE
;
9369 /* Function supportable_widening_operation
9371 Check whether an operation represented by the code CODE is a
9372 widening operation that is supported by the target platform in
9373 vector form (i.e., when operating on arguments of type VECTYPE_IN
9374 producing a result of type VECTYPE_OUT).
9376 Widening operations we currently support are NOP (CONVERT), FLOAT
9377 and WIDEN_MULT. This function checks if these operations are supported
9378 by the target platform either directly (via vector tree-codes), or via
9382 - CODE1 and CODE2 are codes of vector operations to be used when
9383 vectorizing the operation, if available.
9384 - MULTI_STEP_CVT determines the number of required intermediate steps in
9385 case of multi-step conversion (like char->short->int - in that case
9386 MULTI_STEP_CVT will be 1).
9387 - INTERM_TYPES contains the intermediate type required to perform the
9388 widening operation (short in the above example). */
9391 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9392 tree vectype_out
, tree vectype_in
,
9393 enum tree_code
*code1
, enum tree_code
*code2
,
9394 int *multi_step_cvt
,
9395 vec
<tree
> *interm_types
)
9397 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9398 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9399 struct loop
*vect_loop
= NULL
;
9400 machine_mode vec_mode
;
9401 enum insn_code icode1
, icode2
;
9402 optab optab1
, optab2
;
9403 tree vectype
= vectype_in
;
9404 tree wide_vectype
= vectype_out
;
9405 enum tree_code c1
, c2
;
9407 tree prev_type
, intermediate_type
;
9408 machine_mode intermediate_mode
, prev_mode
;
9409 optab optab3
, optab4
;
9411 *multi_step_cvt
= 0;
9413 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9417 case WIDEN_MULT_EXPR
:
9418 /* The result of a vectorized widening operation usually requires
9419 two vectors (because the widened results do not fit into one vector).
9420 The generated vector results would normally be expected to be
9421 generated in the same order as in the original scalar computation,
9422 i.e. if 8 results are generated in each vector iteration, they are
9423 to be organized as follows:
9424 vect1: [res1,res2,res3,res4],
9425 vect2: [res5,res6,res7,res8].
9427 However, in the special case that the result of the widening
9428 operation is used in a reduction computation only, the order doesn't
9429 matter (because when vectorizing a reduction we change the order of
9430 the computation). Some targets can take advantage of this and
9431 generate more efficient code. For example, targets like Altivec,
9432 that support widen_mult using a sequence of {mult_even,mult_odd}
9433 generate the following vectors:
9434 vect1: [res1,res3,res5,res7],
9435 vect2: [res2,res4,res6,res8].
9437 When vectorizing outer-loops, we execute the inner-loop sequentially
9438 (each vectorized inner-loop iteration contributes to VF outer-loop
9439 iterations in parallel). We therefore don't allow to change the
9440 order of the computation in the inner-loop during outer-loop
9442 /* TODO: Another case in which order doesn't *really* matter is when we
9443 widen and then contract again, e.g. (short)((int)x * y >> 8).
9444 Normally, pack_trunc performs an even/odd permute, whereas the
9445 repack from an even/odd expansion would be an interleave, which
9446 would be significantly simpler for e.g. AVX2. */
9447 /* In any case, in order to avoid duplicating the code below, recurse
9448 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9449 are properly set up for the caller. If we fail, we'll continue with
9450 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9452 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9453 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9454 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9455 stmt
, vectype_out
, vectype_in
,
9456 code1
, code2
, multi_step_cvt
,
9459 /* Elements in a vector with vect_used_by_reduction property cannot
9460 be reordered if the use chain with this property does not have the
9461 same operation. One such an example is s += a * b, where elements
9462 in a and b cannot be reordered. Here we check if the vector defined
9463 by STMT is only directly used in the reduction statement. */
9464 tree lhs
= gimple_assign_lhs (stmt
);
9465 use_operand_p dummy
;
9467 stmt_vec_info use_stmt_info
= NULL
;
9468 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9469 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9470 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9473 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9474 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9487 case VEC_WIDEN_MULT_EVEN_EXPR
:
9488 /* Support the recursion induced just above. */
9489 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9490 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9493 case WIDEN_LSHIFT_EXPR
:
9494 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9495 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9499 c1
= VEC_UNPACK_LO_EXPR
;
9500 c2
= VEC_UNPACK_HI_EXPR
;
9504 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9505 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9508 case FIX_TRUNC_EXPR
:
9509 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9510 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9511 computing the operation. */
9518 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9521 if (code
== FIX_TRUNC_EXPR
)
9523 /* The signedness is determined from output operand. */
9524 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9525 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9529 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9530 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9533 if (!optab1
|| !optab2
)
9536 vec_mode
= TYPE_MODE (vectype
);
9537 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9538 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9544 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9545 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9546 /* For scalar masks we may have different boolean
9547 vector types having the same QImode. Thus we
9548 add additional check for elements number. */
9549 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9550 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9551 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9553 /* Check if it's a multi-step conversion that can be done using intermediate
9556 prev_type
= vectype
;
9557 prev_mode
= vec_mode
;
9559 if (!CONVERT_EXPR_CODE_P (code
))
9562 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9563 intermediate steps in promotion sequence. We try
9564 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9566 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9567 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9569 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9570 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9573 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9574 current_vector_size
);
9575 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9580 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9581 TYPE_UNSIGNED (prev_type
));
9583 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9584 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9586 if (!optab3
|| !optab4
9587 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9588 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9589 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9590 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9591 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9592 == CODE_FOR_nothing
)
9593 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9594 == CODE_FOR_nothing
))
9597 interm_types
->quick_push (intermediate_type
);
9598 (*multi_step_cvt
)++;
9600 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9601 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9602 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9603 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9604 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9606 prev_type
= intermediate_type
;
9607 prev_mode
= intermediate_mode
;
9610 interm_types
->release ();
9615 /* Function supportable_narrowing_operation
9617 Check whether an operation represented by the code CODE is a
9618 narrowing operation that is supported by the target platform in
9619 vector form (i.e., when operating on arguments of type VECTYPE_IN
9620 and producing a result of type VECTYPE_OUT).
9622 Narrowing operations we currently support are NOP (CONVERT) and
9623 FIX_TRUNC. This function checks if these operations are supported by
9624 the target platform directly via vector tree-codes.
9627 - CODE1 is the code of a vector operation to be used when
9628 vectorizing the operation, if available.
9629 - MULTI_STEP_CVT determines the number of required intermediate steps in
9630 case of multi-step conversion (like int->short->char - in that case
9631 MULTI_STEP_CVT will be 1).
9632 - INTERM_TYPES contains the intermediate type required to perform the
9633 narrowing operation (short in the above example). */
9636 supportable_narrowing_operation (enum tree_code code
,
9637 tree vectype_out
, tree vectype_in
,
9638 enum tree_code
*code1
, int *multi_step_cvt
,
9639 vec
<tree
> *interm_types
)
9641 machine_mode vec_mode
;
9642 enum insn_code icode1
;
9643 optab optab1
, interm_optab
;
9644 tree vectype
= vectype_in
;
9645 tree narrow_vectype
= vectype_out
;
9647 tree intermediate_type
, prev_type
;
9648 machine_mode intermediate_mode
, prev_mode
;
9652 *multi_step_cvt
= 0;
9656 c1
= VEC_PACK_TRUNC_EXPR
;
9659 case FIX_TRUNC_EXPR
:
9660 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9664 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9665 tree code and optabs used for computing the operation. */
9672 if (code
== FIX_TRUNC_EXPR
)
9673 /* The signedness is determined from output operand. */
9674 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9676 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9681 vec_mode
= TYPE_MODE (vectype
);
9682 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9687 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9688 /* For scalar masks we may have different boolean
9689 vector types having the same QImode. Thus we
9690 add additional check for elements number. */
9691 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9692 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9693 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9695 /* Check if it's a multi-step conversion that can be done using intermediate
9697 prev_mode
= vec_mode
;
9698 prev_type
= vectype
;
9699 if (code
== FIX_TRUNC_EXPR
)
9700 uns
= TYPE_UNSIGNED (vectype_out
);
9702 uns
= TYPE_UNSIGNED (vectype
);
9704 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9705 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9706 costly than signed. */
9707 if (code
== FIX_TRUNC_EXPR
&& uns
)
9709 enum insn_code icode2
;
9712 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9714 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9715 if (interm_optab
!= unknown_optab
9716 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9717 && insn_data
[icode1
].operand
[0].mode
9718 == insn_data
[icode2
].operand
[0].mode
)
9721 optab1
= interm_optab
;
9726 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9727 intermediate steps in promotion sequence. We try
9728 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9729 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9730 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9732 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9733 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9736 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9737 current_vector_size
);
9738 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9743 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9745 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9748 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9749 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9750 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9751 == CODE_FOR_nothing
))
9754 interm_types
->quick_push (intermediate_type
);
9755 (*multi_step_cvt
)++;
9757 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9758 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9759 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9760 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9762 prev_mode
= intermediate_mode
;
9763 prev_type
= intermediate_type
;
9764 optab1
= interm_optab
;
9767 interm_types
->release ();