1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type
{
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
68 return STMT_VINFO_VECTYPE (stmt_info
);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
76 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
77 basic_block bb
= gimple_bb (stmt
);
78 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
84 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
86 return (bb
->loop_father
== loop
->inner
);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
95 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
96 int misalign
, enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
103 kind
= vector_scatter_store
;
106 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
107 stmt_info_for_cost si
= { count
, kind
,
108 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
110 body_cost_vec
->safe_push (si
);
112 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
115 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
116 count
, kind
, stmt_info
, misalign
, where
);
119 /* Return a variable of type ELEM_TYPE[NELEMS]. */
122 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
124 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
128 /* ARRAY is an array of vectors created by create_vector_array.
129 Return an SSA_NAME for the vector in index N. The reference
130 is part of the vectorization of STMT and the vector is associated
131 with scalar destination SCALAR_DEST. */
134 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
135 tree array
, unsigned HOST_WIDE_INT n
)
137 tree vect_type
, vect
, vect_name
, array_ref
;
140 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
141 vect_type
= TREE_TYPE (TREE_TYPE (array
));
142 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
143 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
144 build_int_cst (size_type_node
, n
),
145 NULL_TREE
, NULL_TREE
);
147 new_stmt
= gimple_build_assign (vect
, array_ref
);
148 vect_name
= make_ssa_name (vect
, new_stmt
);
149 gimple_assign_set_lhs (new_stmt
, vect_name
);
150 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
155 /* ARRAY is an array of vectors created by create_vector_array.
156 Emit code to store SSA_NAME VECT in index N of the array.
157 The store is part of the vectorization of STMT. */
160 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
161 tree array
, unsigned HOST_WIDE_INT n
)
166 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
167 build_int_cst (size_type_node
, n
),
168 NULL_TREE
, NULL_TREE
);
170 new_stmt
= gimple_build_assign (array_ref
, vect
);
171 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
174 /* PTR is a pointer to an array of type TYPE. Return a representation
175 of *PTR. The memory reference replaces those in FIRST_DR
179 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
183 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
184 /* Arrays have the same alignment as their type. */
185 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
189 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
191 /* Function vect_mark_relevant.
193 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
197 enum vect_relevant relevant
, bool live_p
)
199 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
200 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
201 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
202 gimple
*pattern_stmt
;
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE
, vect_location
,
207 "mark relevant %d, live %d: ", relevant
, live_p
);
208 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_NOTE
, vect_location
,
226 "last stmt in pattern. don't mark"
227 " relevant/live.\n");
228 stmt_info
= vinfo_for_stmt (pattern_stmt
);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
230 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
231 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
235 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
236 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
237 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
239 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
240 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE
, vect_location
,
244 "already marked relevant/live.\n");
248 worklist
->safe_push (stmt
);
252 /* Function is_simple_and_all_uses_invariant
254 Return true if STMT is simple and all uses of it are invariant. */
257 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
263 if (!is_gimple_assign (stmt
))
266 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
268 enum vect_def_type dt
= vect_uninitialized_def
;
270 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
274 "use not simple.\n");
278 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT in loop that is represented by LOOP_VINFO is
287 "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
297 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
298 enum vect_relevant
*relevant
, bool *live_p
)
300 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
302 imm_use_iterator imm_iter
;
306 *relevant
= vect_unused_in_scope
;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt
)
311 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
312 != loop_exit_ctrl_vec_info_type
)
313 *relevant
= vect_used_in_scope
;
315 /* changing memory. */
316 if (gimple_code (stmt
) != GIMPLE_PHI
)
317 if (gimple_vdef (stmt
)
318 && !gimple_clobber_p (stmt
))
320 if (dump_enabled_p ())
321 dump_printf_loc (MSG_NOTE
, vect_location
,
322 "vec_stmt_relevant_p: stmt has vdefs.\n");
323 *relevant
= vect_used_in_scope
;
326 /* uses outside the loop. */
327 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
329 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
331 basic_block bb
= gimple_bb (USE_STMT (use_p
));
332 if (!flow_bb_inside_loop_p (loop
, bb
))
334 if (dump_enabled_p ())
335 dump_printf_loc (MSG_NOTE
, vect_location
,
336 "vec_stmt_relevant_p: used out of loop.\n");
338 if (is_gimple_debug (USE_STMT (use_p
)))
341 /* We expect all such uses to be in the loop exit phis
342 (because of loop closed form) */
343 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
344 gcc_assert (bb
== single_exit (loop
)->dest
);
351 if (*live_p
&& *relevant
== vect_unused_in_scope
352 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE
, vect_location
,
356 "vec_stmt_relevant_p: stmt live but not relevant.\n");
357 *relevant
= vect_used_only_live
;
360 return (*live_p
|| *relevant
);
364 /* Function exist_non_indexing_operands_for_use_p
366 USE is one of the uses attached to STMT. Check if USE is
367 used in STMT for anything other than indexing an array. */
370 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
373 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 if (!gimple_assign_copy_p (stmt
))
396 if (is_gimple_call (stmt
)
397 && gimple_call_internal_p (stmt
))
398 switch (gimple_call_internal_fn (stmt
))
401 operand
= gimple_call_arg (stmt
, 3);
406 operand
= gimple_call_arg (stmt
, 2);
416 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (stmt
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
450 skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
452 be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
461 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
462 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
463 stmt_vec_info dstmt_vinfo
;
464 basic_block bb
, def_bb
;
466 enum vect_def_type dt
;
468 /* case 1: we are only interested in uses that need to be vectorized. Uses
469 that are used for address computation are not considered relevant. */
470 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
473 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
477 "not vectorized: unsupported use in stmt.\n");
481 if (!def_stmt
|| gimple_nop_p (def_stmt
))
484 def_bb
= gimple_bb (def_stmt
);
485 if (!flow_bb_inside_loop_p (loop
, def_bb
))
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
492 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
493 DEF_STMT must have already been processed, because this should be the
494 only way that STMT, which is a reduction-phi, was put in the worklist,
495 as there should be no other uses for DEF_STMT in the loop. So we just
496 check that everything is as expected, and we are done. */
497 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
498 bb
= gimple_bb (stmt
);
499 if (gimple_code (stmt
) == GIMPLE_PHI
500 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
501 && gimple_code (def_stmt
) != GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
503 && bb
->loop_father
== def_bb
->loop_father
)
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "reduc-stmt defining reduc-phi in the same nest.\n");
508 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
509 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
510 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
511 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
512 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
516 /* case 3a: outer-loop stmt defining an inner-loop stmt:
517 outer-loop-header-bb:
523 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
525 if (dump_enabled_p ())
526 dump_printf_loc (MSG_NOTE
, vect_location
,
527 "outer-loop def-stmt defining inner-loop stmt.\n");
531 case vect_unused_in_scope
:
532 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
533 vect_used_in_scope
: vect_unused_in_scope
;
536 case vect_used_in_outer_by_reduction
:
537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
538 relevant
= vect_used_by_reduction
;
541 case vect_used_in_outer
:
542 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
543 relevant
= vect_used_in_scope
;
546 case vect_used_in_scope
:
554 /* case 3b: inner-loop stmt defining an outer-loop stmt:
555 outer-loop-header-bb:
559 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
561 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE
, vect_location
,
565 "inner-loop def-stmt defining outer-loop stmt.\n");
569 case vect_unused_in_scope
:
570 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
571 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
572 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
575 case vect_used_by_reduction
:
576 case vect_used_only_live
:
577 relevant
= vect_used_in_outer_by_reduction
;
580 case vect_used_in_scope
:
581 relevant
= vect_used_in_outer
;
588 /* We are also not interested in uses on loop PHI backedges that are
589 inductions. Otherwise we'll needlessly vectorize the IV increment
590 and cause hybrid SLP for SLP inductions. Unless the PHI is live
592 else if (gimple_code (stmt
) == GIMPLE_PHI
593 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
594 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
595 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
598 if (dump_enabled_p ())
599 dump_printf_loc (MSG_NOTE
, vect_location
,
600 "induction value on backedge.\n");
605 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
610 /* Function vect_mark_stmts_to_be_vectorized.
612 Not all stmts in the loop need to be vectorized. For example:
621 Stmt 1 and 3 do not need to be vectorized, because loop control and
622 addressing of vectorized data-refs are handled differently.
624 This pass detects such stmts. */
627 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
629 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
630 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
631 unsigned int nbbs
= loop
->num_nodes
;
632 gimple_stmt_iterator si
;
635 stmt_vec_info stmt_vinfo
;
639 enum vect_relevant relevant
;
641 if (dump_enabled_p ())
642 dump_printf_loc (MSG_NOTE
, vect_location
,
643 "=== vect_mark_stmts_to_be_vectorized ===\n");
645 auto_vec
<gimple
*, 64> worklist
;
647 /* 1. Init worklist. */
648 for (i
= 0; i
< nbbs
; i
++)
651 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
657 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
660 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
661 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
663 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
665 stmt
= gsi_stmt (si
);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
669 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
672 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
673 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
677 /* 2. Process_worklist */
678 while (worklist
.length () > 0)
683 stmt
= worklist
.pop ();
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
690 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
691 (DEF_STMT) as relevant/irrelevant according to the relevance property
693 stmt_vinfo
= vinfo_for_stmt (stmt
);
694 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
696 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
697 propagated as is to the DEF_STMTs of its USEs.
699 One exception is when STMT has been identified as defining a reduction
700 variable; in this case we set the relevance to vect_used_by_reduction.
701 This is because we distinguish between two kinds of relevant stmts -
702 those that are used by a reduction computation, and those that are
703 (also) used by a regular computation. This allows us later on to
704 identify stmts that are used solely by a reduction, and therefore the
705 order of the results that they produce does not have to be kept. */
707 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
709 case vect_reduction_def
:
710 gcc_assert (relevant
!= vect_unused_in_scope
);
711 if (relevant
!= vect_unused_in_scope
712 && relevant
!= vect_used_in_scope
713 && relevant
!= vect_used_by_reduction
714 && relevant
!= vect_used_only_live
)
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
718 "unsupported use of reduction.\n");
723 case vect_nested_cycle
:
724 if (relevant
!= vect_unused_in_scope
725 && relevant
!= vect_used_in_outer_by_reduction
726 && relevant
!= vect_used_in_outer
)
728 if (dump_enabled_p ())
729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
730 "unsupported use of nested cycle.\n");
736 case vect_double_reduction_def
:
737 if (relevant
!= vect_unused_in_scope
738 && relevant
!= vect_used_by_reduction
739 && relevant
!= vect_used_only_live
)
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
743 "unsupported use of double reduction.\n");
753 if (is_pattern_stmt_p (stmt_vinfo
))
755 /* Pattern statements are not inserted into the code, so
756 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
757 have to scan the RHS or function arguments instead. */
758 if (is_gimple_assign (stmt
))
760 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
761 tree op
= gimple_assign_rhs1 (stmt
);
764 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
766 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
767 relevant
, &worklist
, false)
768 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
769 relevant
, &worklist
, false))
773 for (; i
< gimple_num_ops (stmt
); i
++)
775 op
= gimple_op (stmt
, i
);
776 if (TREE_CODE (op
) == SSA_NAME
777 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
782 else if (is_gimple_call (stmt
))
784 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
786 tree arg
= gimple_call_arg (stmt
, i
);
787 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
794 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
796 tree op
= USE_FROM_PTR (use_p
);
797 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
802 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
804 gather_scatter_info gs_info
;
805 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
807 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
811 } /* while worklist */
817 /* Function vect_model_simple_cost.
819 Models cost for simple operations, i.e. those that only emit ncopies of a
820 single op. Right now, this does not account for multiple insns that could
821 be generated for the single vector op. We will handle that shortly. */
824 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
825 enum vect_def_type
*dt
,
827 stmt_vector_for_cost
*prologue_cost_vec
,
828 stmt_vector_for_cost
*body_cost_vec
)
831 int inside_cost
= 0, prologue_cost
= 0;
833 /* The SLP costs were already calculated during SLP tree build. */
834 if (PURE_SLP_STMT (stmt_info
))
837 /* Cost the "broadcast" of a scalar operand in to a vector operand.
838 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
840 for (i
= 0; i
< ndts
; i
++)
841 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
842 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
843 stmt_info
, 0, vect_prologue
);
845 /* Pass the inside-of-loop statements to the target-specific cost model. */
846 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
847 stmt_info
, 0, vect_body
);
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_NOTE
, vect_location
,
851 "vect_model_simple_cost: inside_cost = %d, "
852 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
856 /* Model cost for type demotion and promotion operations. PWR is normally
857 zero for single-step promotions and demotions. It will be one if
858 two-step promotion/demotion is required, and so on. Each additional
859 step doubles the number of instructions required. */
862 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
863 enum vect_def_type
*dt
, int pwr
)
866 int inside_cost
= 0, prologue_cost
= 0;
867 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
868 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
869 void *target_cost_data
;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info
))
876 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
878 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
880 for (i
= 0; i
< pwr
+ 1; i
++)
882 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
884 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
885 vec_promote_demote
, stmt_info
, 0,
889 /* FORNOW: Assuming maximum 2 args per stmts. */
890 for (i
= 0; i
< 2; i
++)
891 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
892 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
893 stmt_info
, 0, vect_prologue
);
895 if (dump_enabled_p ())
896 dump_printf_loc (MSG_NOTE
, vect_location
,
897 "vect_model_promotion_demotion_cost: inside_cost = %d, "
898 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
901 /* Function vect_model_store_cost
903 Models cost for stores. In the case of grouped accesses, one access
904 has the overhead of the grouped access attributed to it. */
907 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
908 vect_memory_access_type memory_access_type
,
909 enum vect_def_type dt
, slp_tree slp_node
,
910 stmt_vector_for_cost
*prologue_cost_vec
,
911 stmt_vector_for_cost
*body_cost_vec
)
913 unsigned int inside_cost
= 0, prologue_cost
= 0;
914 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
915 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
916 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
918 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
919 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
920 stmt_info
, 0, vect_prologue
);
922 /* Grouped stores update all elements in the group at once,
923 so we want the DR for the first statement. */
924 if (!slp_node
&& grouped_access_p
)
926 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
927 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
930 /* True if we should include any once-per-group costs as well as
931 the cost of the statement itself. For SLP we only get called
932 once per group anyhow. */
933 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
935 /* We assume that the cost of a single store-lanes instruction is
936 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
937 access is instead being provided by a permute-and-store operation,
938 include the cost of the permutes. */
940 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
942 /* Uses a high and low interleave or shuffle operations for each
944 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
945 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
946 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
947 stmt_info
, 0, vect_body
);
949 if (dump_enabled_p ())
950 dump_printf_loc (MSG_NOTE
, vect_location
,
951 "vect_model_store_cost: strided group_size = %d .\n",
955 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
956 /* Costs of the stores. */
957 if (memory_access_type
== VMAT_ELEMENTWISE
958 || memory_access_type
== VMAT_GATHER_SCATTER
)
959 /* N scalar stores plus extracting the elements. */
960 inside_cost
+= record_stmt_cost (body_cost_vec
,
961 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
962 scalar_store
, stmt_info
, 0, vect_body
);
964 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
966 if (memory_access_type
== VMAT_ELEMENTWISE
967 || memory_access_type
== VMAT_STRIDED_SLP
)
968 inside_cost
+= record_stmt_cost (body_cost_vec
,
969 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
970 vec_to_scalar
, stmt_info
, 0, vect_body
);
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE
, vect_location
,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
979 /* Calculate cost of DR's memory access. */
981 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
982 unsigned int *inside_cost
,
983 stmt_vector_for_cost
*body_cost_vec
)
985 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
986 gimple
*stmt
= DR_STMT (dr
);
987 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
989 switch (alignment_support_scheme
)
993 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
994 vector_store
, stmt_info
, 0,
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE
, vect_location
,
999 "vect_model_store_cost: aligned.\n");
1003 case dr_unaligned_supported
:
1005 /* Here, we assign an additional cost for the unaligned store. */
1006 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1007 unaligned_store
, stmt_info
,
1008 DR_MISALIGNMENT (dr
), vect_body
);
1009 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE
, vect_location
,
1011 "vect_model_store_cost: unaligned supported by "
1016 case dr_unaligned_unsupported
:
1018 *inside_cost
= VECT_MAX_COST
;
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1022 "vect_model_store_cost: unsupported access.\n");
1032 /* Function vect_model_load_cost
1034 Models cost for loads. In the case of grouped accesses, one access has
1035 the overhead of the grouped access attributed to it. Since unaligned
1036 accesses are supported for loads, we also account for the costs of the
1037 access scheme chosen. */
1040 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1041 vect_memory_access_type memory_access_type
,
1043 stmt_vector_for_cost
*prologue_cost_vec
,
1044 stmt_vector_for_cost
*body_cost_vec
)
1046 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1047 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1048 unsigned int inside_cost
= 0, prologue_cost
= 0;
1049 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1051 /* Grouped loads read all elements in the group at once,
1052 so we want the DR for the first statement. */
1053 if (!slp_node
&& grouped_access_p
)
1055 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1056 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1059 /* True if we should include any once-per-group costs as well as
1060 the cost of the statement itself. For SLP we only get called
1061 once per group anyhow. */
1062 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1064 /* We assume that the cost of a single load-lanes instruction is
1065 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1066 access is instead being provided by a load-and-permute operation,
1067 include the cost of the permutes. */
1069 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1071 /* Uses an even and odd extract operations or shuffle operations
1072 for each needed permute. */
1073 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1074 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1075 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1076 stmt_info
, 0, vect_body
);
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE
, vect_location
,
1080 "vect_model_load_cost: strided group_size = %d .\n",
1084 /* The loads themselves. */
1085 if (memory_access_type
== VMAT_ELEMENTWISE
1086 || memory_access_type
== VMAT_GATHER_SCATTER
)
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1090 inside_cost
+= record_stmt_cost (body_cost_vec
,
1091 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1092 scalar_load
, stmt_info
, 0, vect_body
);
1095 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1096 &inside_cost
, &prologue_cost
,
1097 prologue_cost_vec
, body_cost_vec
, true);
1098 if (memory_access_type
== VMAT_ELEMENTWISE
1099 || memory_access_type
== VMAT_STRIDED_SLP
)
1100 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1101 stmt_info
, 0, vect_body
);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE
, vect_location
,
1105 "vect_model_load_cost: inside_cost = %d, "
1106 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1110 /* Calculate cost of DR's memory access. */
1112 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1113 bool add_realign_cost
, unsigned int *inside_cost
,
1114 unsigned int *prologue_cost
,
1115 stmt_vector_for_cost
*prologue_cost_vec
,
1116 stmt_vector_for_cost
*body_cost_vec
,
1117 bool record_prologue_costs
)
1119 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1120 gimple
*stmt
= DR_STMT (dr
);
1121 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1123 switch (alignment_support_scheme
)
1127 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1128 stmt_info
, 0, vect_body
);
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE
, vect_location
,
1132 "vect_model_load_cost: aligned.\n");
1136 case dr_unaligned_supported
:
1138 /* Here, we assign an additional cost for the unaligned load. */
1139 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1140 unaligned_load
, stmt_info
,
1141 DR_MISALIGNMENT (dr
), vect_body
);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE
, vect_location
,
1145 "vect_model_load_cost: unaligned supported by "
1150 case dr_explicit_realign
:
1152 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1153 vector_load
, stmt_info
, 0, vect_body
);
1154 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1155 vec_perm
, stmt_info
, 0, vect_body
);
1157 /* FIXME: If the misalignment remains fixed across the iterations of
1158 the containing loop, the following cost should be added to the
1160 if (targetm
.vectorize
.builtin_mask_for_load
)
1161 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1162 stmt_info
, 0, vect_body
);
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE
, vect_location
,
1166 "vect_model_load_cost: explicit realign\n");
1170 case dr_explicit_realign_optimized
:
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE
, vect_location
,
1174 "vect_model_load_cost: unaligned software "
1177 /* Unaligned software pipeline has a load of an address, an initial
1178 load, and possibly a mask operation to "prime" the loop. However,
1179 if this is an access in a group of loads, which provide grouped
1180 access, then the above cost should only be considered for one
1181 access in the group. Inside the loop, there is a load op
1182 and a realignment op. */
1184 if (add_realign_cost
&& record_prologue_costs
)
1186 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1187 vector_stmt
, stmt_info
,
1189 if (targetm
.vectorize
.builtin_mask_for_load
)
1190 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1191 vector_stmt
, stmt_info
,
1195 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1196 stmt_info
, 0, vect_body
);
1197 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1198 stmt_info
, 0, vect_body
);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE
, vect_location
,
1202 "vect_model_load_cost: explicit realign optimized"
1208 case dr_unaligned_unsupported
:
1210 *inside_cost
= VECT_MAX_COST
;
1212 if (dump_enabled_p ())
1213 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1214 "vect_model_load_cost: unsupported access.\n");
1223 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1224 the loop preheader for the vectorized stmt STMT. */
1227 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1230 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1233 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1234 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1238 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1242 if (nested_in_vect_loop_p (loop
, stmt
))
1245 pe
= loop_preheader_edge (loop
);
1246 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1247 gcc_assert (!new_bb
);
1251 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1253 gimple_stmt_iterator gsi_bb_start
;
1255 gcc_assert (bb_vinfo
);
1256 bb
= BB_VINFO_BB (bb_vinfo
);
1257 gsi_bb_start
= gsi_after_labels (bb
);
1258 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1262 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
,
1265 "created new init_stmt: ");
1266 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1270 /* Function vect_init_vector.
1272 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1273 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1274 vector type a vector with all elements equal to VAL is created first.
1275 Place the initialization at BSI if it is not NULL. Otherwise, place the
1276 initialization at the loop preheader.
1277 Return the DEF of INIT_STMT.
1278 It will be used in the vectorization of STMT. */
1281 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1286 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1287 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1289 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1290 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1292 /* Scalar boolean value should be transformed into
1293 all zeros or all ones value before building a vector. */
1294 if (VECTOR_BOOLEAN_TYPE_P (type
))
1296 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1297 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1299 if (CONSTANT_CLASS_P (val
))
1300 val
= integer_zerop (val
) ? false_val
: true_val
;
1303 new_temp
= make_ssa_name (TREE_TYPE (type
));
1304 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1305 val
, true_val
, false_val
);
1306 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1310 else if (CONSTANT_CLASS_P (val
))
1311 val
= fold_convert (TREE_TYPE (type
), val
);
1314 new_temp
= make_ssa_name (TREE_TYPE (type
));
1315 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1316 init_stmt
= gimple_build_assign (new_temp
,
1317 fold_build1 (VIEW_CONVERT_EXPR
,
1321 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1322 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1326 val
= build_vector_from_val (type
, val
);
1329 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1330 init_stmt
= gimple_build_assign (new_temp
, val
);
1331 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1335 /* Function vect_get_vec_def_for_operand_1.
1337 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1338 DT that will be used in the vectorized stmt. */
1341 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1345 stmt_vec_info def_stmt_info
= NULL
;
1349 /* operand is a constant or a loop invariant. */
1350 case vect_constant_def
:
1351 case vect_external_def
:
1352 /* Code should use vect_get_vec_def_for_operand. */
1355 /* operand is defined inside the loop. */
1356 case vect_internal_def
:
1358 /* Get the def from the vectorized stmt. */
1359 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1361 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1362 /* Get vectorized pattern statement. */
1364 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1365 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1366 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1367 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1368 gcc_assert (vec_stmt
);
1369 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1370 vec_oprnd
= PHI_RESULT (vec_stmt
);
1371 else if (is_gimple_call (vec_stmt
))
1372 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1374 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1378 /* operand is defined by a loop header phi. */
1379 case vect_reduction_def
:
1380 case vect_double_reduction_def
:
1381 case vect_nested_cycle
:
1382 case vect_induction_def
:
1384 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1386 /* Get the def from the vectorized stmt. */
1387 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1388 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1389 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1390 vec_oprnd
= PHI_RESULT (vec_stmt
);
1392 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1402 /* Function vect_get_vec_def_for_operand.
1404 OP is an operand in STMT. This function returns a (vector) def that will be
1405 used in the vectorized stmt for STMT.
1407 In the case that OP is an SSA_NAME which is defined in the loop, then
1408 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410 In case OP is an invariant or constant, a new stmt that creates a vector def
1411 needs to be introduced. VECTYPE may be used to specify a required type for
1412 vector invariant. */
1415 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1418 enum vect_def_type dt
;
1420 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1421 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1423 if (dump_enabled_p ())
1425 dump_printf_loc (MSG_NOTE
, vect_location
,
1426 "vect_get_vec_def_for_operand: ");
1427 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1428 dump_printf (MSG_NOTE
, "\n");
1431 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1432 gcc_assert (is_simple_use
);
1433 if (def_stmt
&& dump_enabled_p ())
1435 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1436 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1439 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1441 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1445 vector_type
= vectype
;
1446 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1447 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1448 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1450 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1452 gcc_assert (vector_type
);
1453 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1456 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1460 /* Function vect_get_vec_def_for_stmt_copy
1462 Return a vector-def for an operand. This function is used when the
1463 vectorized stmt to be created (by the caller to this function) is a "copy"
1464 created in case the vectorized result cannot fit in one vector, and several
1465 copies of the vector-stmt are required. In this case the vector-def is
1466 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1467 of the stmt that defines VEC_OPRND.
1468 DT is the type of the vector def VEC_OPRND.
1471 In case the vectorization factor (VF) is bigger than the number
1472 of elements that can fit in a vectype (nunits), we have to generate
1473 more than one vector stmt to vectorize the scalar stmt. This situation
1474 arises when there are multiple data-types operated upon in the loop; the
1475 smallest data-type determines the VF, and as a result, when vectorizing
1476 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1477 vector stmt (each computing a vector of 'nunits' results, and together
1478 computing 'VF' results in each iteration). This function is called when
1479 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1480 which VF=16 and nunits=4, so the number of copies required is 4):
1482 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1484 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1485 VS1.1: vx.1 = memref1 VS1.2
1486 VS1.2: vx.2 = memref2 VS1.3
1487 VS1.3: vx.3 = memref3
1489 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1490 VSnew.1: vz1 = vx.1 + ... VSnew.2
1491 VSnew.2: vz2 = vx.2 + ... VSnew.3
1492 VSnew.3: vz3 = vx.3 + ...
1494 The vectorization of S1 is explained in vectorizable_load.
1495 The vectorization of S2:
1496 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1497 the function 'vect_get_vec_def_for_operand' is called to
1498 get the relevant vector-def for each operand of S2. For operand x it
1499 returns the vector-def 'vx.0'.
1501 To create the remaining copies of the vector-stmt (VSnew.j), this
1502 function is called to get the relevant vector-def for each operand. It is
1503 obtained from the respective VS1.j stmt, which is recorded in the
1504 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506 For example, to obtain the vector-def 'vx.1' in order to create the
1507 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1508 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1509 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1510 and return its def ('vx.1').
1511 Overall, to create the above sequence this function will be called 3 times:
1512 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1513 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1514 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1517 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1519 gimple
*vec_stmt_for_operand
;
1520 stmt_vec_info def_stmt_info
;
1522 /* Do nothing; can reuse same def. */
1523 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1526 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1527 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1528 gcc_assert (def_stmt_info
);
1529 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1530 gcc_assert (vec_stmt_for_operand
);
1531 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1532 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1534 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1539 /* Get vectorized definitions for the operands to create a copy of an original
1540 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1543 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1544 vec
<tree
> *vec_oprnds0
,
1545 vec
<tree
> *vec_oprnds1
)
1547 tree vec_oprnd
= vec_oprnds0
->pop ();
1549 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1550 vec_oprnds0
->quick_push (vec_oprnd
);
1552 if (vec_oprnds1
&& vec_oprnds1
->length ())
1554 vec_oprnd
= vec_oprnds1
->pop ();
1555 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1556 vec_oprnds1
->quick_push (vec_oprnd
);
1561 /* Get vectorized definitions for OP0 and OP1. */
1564 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1565 vec
<tree
> *vec_oprnds0
,
1566 vec
<tree
> *vec_oprnds1
,
1571 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1572 auto_vec
<tree
> ops (nops
);
1573 auto_vec
<vec
<tree
> > vec_defs (nops
);
1575 ops
.quick_push (op0
);
1577 ops
.quick_push (op1
);
1579 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1581 *vec_oprnds0
= vec_defs
[0];
1583 *vec_oprnds1
= vec_defs
[1];
1589 vec_oprnds0
->create (1);
1590 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1591 vec_oprnds0
->quick_push (vec_oprnd
);
1595 vec_oprnds1
->create (1);
1596 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1597 vec_oprnds1
->quick_push (vec_oprnd
);
1603 /* Function vect_finish_stmt_generation.
1605 Insert a new stmt. */
1608 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1609 gimple_stmt_iterator
*gsi
)
1611 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1612 vec_info
*vinfo
= stmt_info
->vinfo
;
1614 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1616 if (!gsi_end_p (*gsi
)
1617 && gimple_has_mem_ops (vec_stmt
))
1619 gimple
*at_stmt
= gsi_stmt (*gsi
);
1620 tree vuse
= gimple_vuse (at_stmt
);
1621 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1623 tree vdef
= gimple_vdef (at_stmt
);
1624 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1625 /* If we have an SSA vuse and insert a store, update virtual
1626 SSA form to avoid triggering the renamer. Do so only
1627 if we can easily see all uses - which is what almost always
1628 happens with the way vectorized stmts are inserted. */
1629 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1630 && ((is_gimple_assign (vec_stmt
)
1631 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1632 || (is_gimple_call (vec_stmt
)
1633 && !(gimple_call_flags (vec_stmt
)
1634 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1636 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1637 gimple_set_vdef (vec_stmt
, new_vdef
);
1638 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1642 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1644 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1646 if (dump_enabled_p ())
1648 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1649 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1652 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1654 /* While EH edges will generally prevent vectorization, stmt might
1655 e.g. be in a must-not-throw region. Ensure newly created stmts
1656 that could throw are part of the same region. */
1657 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1658 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1659 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1662 /* We want to vectorize a call to combined function CFN with function
1663 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1664 as the types of all inputs. Check whether this is possible using
1665 an internal function, returning its code if so or IFN_LAST if not. */
1668 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1669 tree vectype_out
, tree vectype_in
)
1672 if (internal_fn_p (cfn
))
1673 ifn
= as_internal_fn (cfn
);
1675 ifn
= associated_internal_fn (fndecl
);
1676 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1678 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1679 if (info
.vectorizable
)
1681 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1682 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1683 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1684 OPTIMIZE_FOR_SPEED
))
1692 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1693 gimple_stmt_iterator
*);
1695 /* STMT is a non-strided load or store, meaning that it accesses
1696 elements with a known constant step. Return -1 if that step
1697 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1700 compare_step_with_zero (gimple
*stmt
)
1702 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1703 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1704 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
1708 /* If the target supports a permute mask that reverses the elements in
1709 a vector of type VECTYPE, return that mask, otherwise return null. */
1712 perm_mask_for_reverse (tree vectype
)
1716 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1718 auto_vec_perm_indices
sel (nunits
);
1719 for (i
= 0; i
< nunits
; ++i
)
1720 sel
.quick_push (nunits
- 1 - i
);
1722 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
))
1724 return vect_gen_perm_mask_checked (vectype
, sel
);
1727 /* A subroutine of get_load_store_type, with a subset of the same
1728 arguments. Handle the case where STMT is part of a grouped load
1731 For stores, the statements in the group are all consecutive
1732 and there is no gap at the end. For loads, the statements in the
1733 group might not be consecutive; there can be gaps between statements
1734 as well as at the end. */
1737 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1738 vec_load_store_type vls_type
,
1739 vect_memory_access_type
*memory_access_type
)
1741 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1742 vec_info
*vinfo
= stmt_info
->vinfo
;
1743 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1744 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1745 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1746 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1747 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1748 bool single_element_p
= (stmt
== first_stmt
1749 && !GROUP_NEXT_ELEMENT (stmt_info
));
1750 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1751 unsigned nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1753 /* True if the vectorized statements would access beyond the last
1754 statement in the group. */
1755 bool overrun_p
= false;
1757 /* True if we can cope with such overrun by peeling for gaps, so that
1758 there is at least one final scalar iteration after the vector loop. */
1759 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1761 /* There can only be a gap at the end of the group if the stride is
1762 known at compile time. */
1763 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1765 /* Stores can't yet have gaps. */
1766 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1770 if (STMT_VINFO_STRIDED_P (stmt_info
))
1772 /* Try to use consecutive accesses of GROUP_SIZE elements,
1773 separated by the stride, until we have a complete vector.
1774 Fall back to scalar accesses if that isn't possible. */
1775 if (nunits
% group_size
== 0)
1776 *memory_access_type
= VMAT_STRIDED_SLP
;
1778 *memory_access_type
= VMAT_ELEMENTWISE
;
1782 overrun_p
= loop_vinfo
&& gap
!= 0;
1783 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1786 "Grouped store with gaps requires"
1787 " non-consecutive accesses\n");
1790 /* An overrun is fine if the trailing elements are smaller
1791 than the alignment boundary B. Every vector access will
1792 be a multiple of B and so we are guaranteed to access a
1793 non-gap element in the same B-sized block. */
1795 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1796 / vect_get_scalar_dr_size (first_dr
)))
1798 if (overrun_p
&& !can_overrun_p
)
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1802 "Peeling for outer loop is not supported\n");
1805 *memory_access_type
= VMAT_CONTIGUOUS
;
1810 /* We can always handle this case using elementwise accesses,
1811 but see if something more efficient is available. */
1812 *memory_access_type
= VMAT_ELEMENTWISE
;
1814 /* If there is a gap at the end of the group then these optimizations
1815 would access excess elements in the last iteration. */
1816 bool would_overrun_p
= (gap
!= 0);
1817 /* An overrun is fine if the trailing elements are smaller than the
1818 alignment boundary B. Every vector access will be a multiple of B
1819 and so we are guaranteed to access a non-gap element in the
1820 same B-sized block. */
1822 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1823 / vect_get_scalar_dr_size (first_dr
)))
1824 would_overrun_p
= false;
1826 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1827 && (can_overrun_p
|| !would_overrun_p
)
1828 && compare_step_with_zero (stmt
) > 0)
1830 /* First try using LOAD/STORE_LANES. */
1831 if (vls_type
== VLS_LOAD
1832 ? vect_load_lanes_supported (vectype
, group_size
)
1833 : vect_store_lanes_supported (vectype
, group_size
))
1835 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1836 overrun_p
= would_overrun_p
;
1839 /* If that fails, try using permuting loads. */
1840 if (*memory_access_type
== VMAT_ELEMENTWISE
1841 && (vls_type
== VLS_LOAD
1842 ? vect_grouped_load_supported (vectype
, single_element_p
,
1844 : vect_grouped_store_supported (vectype
, group_size
)))
1846 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1847 overrun_p
= would_overrun_p
;
1852 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1854 /* STMT is the leader of the group. Check the operands of all the
1855 stmts of the group. */
1856 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1859 gcc_assert (gimple_assign_single_p (next_stmt
));
1860 tree op
= gimple_assign_rhs1 (next_stmt
);
1862 enum vect_def_type dt
;
1863 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1867 "use not simple.\n");
1870 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1876 gcc_assert (can_overrun_p
);
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1879 "Data access with gaps requires scalar "
1881 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1887 /* A subroutine of get_load_store_type, with a subset of the same
1888 arguments. Handle the case where STMT is a load or store that
1889 accesses consecutive elements with a negative step. */
1891 static vect_memory_access_type
1892 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1893 vec_load_store_type vls_type
,
1894 unsigned int ncopies
)
1896 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1897 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1898 dr_alignment_support alignment_support_scheme
;
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1904 "multiple types with negative step.\n");
1905 return VMAT_ELEMENTWISE
;
1908 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1909 if (alignment_support_scheme
!= dr_aligned
1910 && alignment_support_scheme
!= dr_unaligned_supported
)
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1914 "negative step but alignment required.\n");
1915 return VMAT_ELEMENTWISE
;
1918 if (vls_type
== VLS_STORE_INVARIANT
)
1920 if (dump_enabled_p ())
1921 dump_printf_loc (MSG_NOTE
, vect_location
,
1922 "negative step with invariant source;"
1923 " no permute needed.\n");
1924 return VMAT_CONTIGUOUS_DOWN
;
1927 if (!perm_mask_for_reverse (vectype
))
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1931 "negative step and reversing not supported.\n");
1932 return VMAT_ELEMENTWISE
;
1935 return VMAT_CONTIGUOUS_REVERSE
;
1938 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1939 if there is a memory access type that the vectorized form can use,
1940 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1941 or scatters, fill in GS_INFO accordingly.
1943 SLP says whether we're performing SLP rather than loop vectorization.
1944 VECTYPE is the vector type that the vectorized statements will use.
1945 NCOPIES is the number of vector statements that will be needed. */
1948 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1949 vec_load_store_type vls_type
, unsigned int ncopies
,
1950 vect_memory_access_type
*memory_access_type
,
1951 gather_scatter_info
*gs_info
)
1953 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1954 vec_info
*vinfo
= stmt_info
->vinfo
;
1955 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1956 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1958 *memory_access_type
= VMAT_GATHER_SCATTER
;
1960 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1962 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1963 &gs_info
->offset_dt
,
1964 &gs_info
->offset_vectype
))
1966 if (dump_enabled_p ())
1967 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1968 "%s index use not simple.\n",
1969 vls_type
== VLS_LOAD
? "gather" : "scatter");
1973 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1975 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1976 memory_access_type
))
1979 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1982 *memory_access_type
= VMAT_ELEMENTWISE
;
1986 int cmp
= compare_step_with_zero (stmt
);
1988 *memory_access_type
= get_negative_load_store_type
1989 (stmt
, vectype
, vls_type
, ncopies
);
1992 gcc_assert (vls_type
== VLS_LOAD
);
1993 *memory_access_type
= VMAT_INVARIANT
;
1996 *memory_access_type
= VMAT_CONTIGUOUS
;
1999 /* FIXME: At the moment the cost model seems to underestimate the
2000 cost of using elementwise accesses. This check preserves the
2001 traditional behavior until that can be fixed. */
2002 if (*memory_access_type
== VMAT_ELEMENTWISE
2003 && !STMT_VINFO_STRIDED_P (stmt_info
))
2005 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2007 "not falling back to elementwise accesses\n");
2013 /* Function vectorizable_mask_load_store.
2015 Check if STMT performs a conditional load or store that can be vectorized.
2016 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2017 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2018 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2021 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2022 gimple
**vec_stmt
, slp_tree slp_node
)
2024 tree vec_dest
= NULL
;
2025 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2026 stmt_vec_info prev_stmt_info
;
2027 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2028 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2029 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2030 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2031 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2032 tree rhs_vectype
= NULL_TREE
;
2037 tree dataref_ptr
= NULL_TREE
;
2039 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2043 gather_scatter_info gs_info
;
2044 vec_load_store_type vls_type
;
2047 enum vect_def_type dt
;
2049 if (slp_node
!= NULL
)
2052 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2053 gcc_assert (ncopies
>= 1);
2055 mask
= gimple_call_arg (stmt
, 2);
2057 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2060 /* FORNOW. This restriction should be relaxed. */
2061 if (nested_in_vect_loop
&& ncopies
> 1)
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2065 "multiple types in nested loop.");
2069 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2072 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2076 if (!STMT_VINFO_DATA_REF (stmt_info
))
2079 elem_type
= TREE_TYPE (vectype
);
2081 if (TREE_CODE (mask
) != SSA_NAME
)
2084 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2088 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2090 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2091 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2094 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2096 tree rhs
= gimple_call_arg (stmt
, 3);
2097 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2099 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2100 vls_type
= VLS_STORE_INVARIANT
;
2102 vls_type
= VLS_STORE
;
2105 vls_type
= VLS_LOAD
;
2107 vect_memory_access_type memory_access_type
;
2108 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2109 &memory_access_type
, &gs_info
))
2112 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2114 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2116 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2117 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2121 "masked gather with integer mask not supported.");
2125 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2129 "unsupported access type for masked %s.\n",
2130 vls_type
== VLS_LOAD
? "load" : "store");
2133 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2134 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2135 TYPE_MODE (mask_vectype
),
2136 vls_type
== VLS_LOAD
)
2138 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2141 if (!vec_stmt
) /* transformation not required. */
2143 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2144 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2145 if (vls_type
== VLS_LOAD
)
2146 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2149 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2150 dt
, NULL
, NULL
, NULL
);
2153 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2157 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2159 tree vec_oprnd0
= NULL_TREE
, op
;
2160 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2161 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2162 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2163 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2164 tree mask_perm_mask
= NULL_TREE
;
2165 edge pe
= loop_preheader_edge (loop
);
2168 enum { NARROW
, NONE
, WIDEN
} modifier
;
2169 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2171 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2172 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2173 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2174 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2175 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2176 scaletype
= TREE_VALUE (arglist
);
2177 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2178 && types_compatible_p (srctype
, masktype
));
2180 if (nunits
== gather_off_nunits
)
2182 else if (nunits
== gather_off_nunits
/ 2)
2186 auto_vec_perm_indices
sel (gather_off_nunits
);
2187 for (i
= 0; i
< gather_off_nunits
; ++i
)
2188 sel
.quick_push (i
| nunits
);
2190 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
2192 else if (nunits
== gather_off_nunits
* 2)
2196 auto_vec_perm_indices
sel (nunits
);
2197 sel
.quick_grow (nunits
);
2198 for (i
= 0; i
< nunits
; ++i
)
2199 sel
[i
] = i
< gather_off_nunits
2200 ? i
: i
+ nunits
- gather_off_nunits
;
2202 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
2204 for (i
= 0; i
< nunits
; ++i
)
2205 sel
[i
] = i
| gather_off_nunits
;
2206 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
2211 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2213 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2214 if (!is_gimple_min_invariant (ptr
))
2216 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2217 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2218 gcc_assert (!new_bb
);
2221 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2223 prev_stmt_info
= NULL
;
2224 for (j
= 0; j
< ncopies
; ++j
)
2226 if (modifier
== WIDEN
&& (j
& 1))
2227 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2228 perm_mask
, stmt
, gsi
);
2231 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2234 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2236 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2238 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2239 == TYPE_VECTOR_SUBPARTS (idxtype
));
2240 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2241 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2243 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2244 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2248 if (mask_perm_mask
&& (j
& 1))
2249 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2250 mask_perm_mask
, stmt
, gsi
);
2254 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2257 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2258 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2262 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2264 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2265 == TYPE_VECTOR_SUBPARTS (masktype
));
2266 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2267 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2269 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2270 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2276 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2279 if (!useless_type_conversion_p (vectype
, rettype
))
2281 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2282 == TYPE_VECTOR_SUBPARTS (rettype
));
2283 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2284 gimple_call_set_lhs (new_stmt
, op
);
2285 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2286 var
= make_ssa_name (vec_dest
);
2287 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2288 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2292 var
= make_ssa_name (vec_dest
, new_stmt
);
2293 gimple_call_set_lhs (new_stmt
, var
);
2296 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2298 if (modifier
== NARROW
)
2305 var
= permute_vec_elements (prev_res
, var
,
2306 perm_mask
, stmt
, gsi
);
2307 new_stmt
= SSA_NAME_DEF_STMT (var
);
2310 if (prev_stmt_info
== NULL
)
2311 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2313 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2314 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2317 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2319 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2321 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2322 stmt_info
= vinfo_for_stmt (stmt
);
2324 tree lhs
= gimple_call_lhs (stmt
);
2325 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2326 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2327 set_vinfo_for_stmt (stmt
, NULL
);
2328 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2329 gsi_replace (gsi
, new_stmt
, true);
2332 else if (vls_type
!= VLS_LOAD
)
2334 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2335 prev_stmt_info
= NULL
;
2336 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2337 for (i
= 0; i
< ncopies
; i
++)
2339 unsigned align
, misalign
;
2343 tree rhs
= gimple_call_arg (stmt
, 3);
2344 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2345 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2347 /* We should have catched mismatched types earlier. */
2348 gcc_assert (useless_type_conversion_p (vectype
,
2349 TREE_TYPE (vec_rhs
)));
2350 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2351 NULL_TREE
, &dummy
, gsi
,
2352 &ptr_incr
, false, &inv_p
);
2353 gcc_assert (!inv_p
);
2357 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2358 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2359 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2360 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2361 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2362 TYPE_SIZE_UNIT (vectype
));
2365 align
= DR_TARGET_ALIGNMENT (dr
);
2366 if (aligned_access_p (dr
))
2368 else if (DR_MISALIGNMENT (dr
) == -1)
2370 align
= TYPE_ALIGN_UNIT (elem_type
);
2374 misalign
= DR_MISALIGNMENT (dr
);
2375 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2377 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2378 misalign
? least_bit_hwi (misalign
) : align
);
2380 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2381 ptr
, vec_mask
, vec_rhs
);
2382 gimple_call_set_nothrow (call
, true);
2384 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2386 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2388 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2389 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2394 tree vec_mask
= NULL_TREE
;
2395 prev_stmt_info
= NULL
;
2396 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2397 for (i
= 0; i
< ncopies
; i
++)
2399 unsigned align
, misalign
;
2403 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2405 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2406 NULL_TREE
, &dummy
, gsi
,
2407 &ptr_incr
, false, &inv_p
);
2408 gcc_assert (!inv_p
);
2412 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2413 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2414 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2415 TYPE_SIZE_UNIT (vectype
));
2418 align
= DR_TARGET_ALIGNMENT (dr
);
2419 if (aligned_access_p (dr
))
2421 else if (DR_MISALIGNMENT (dr
) == -1)
2423 align
= TYPE_ALIGN_UNIT (elem_type
);
2427 misalign
= DR_MISALIGNMENT (dr
);
2428 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2430 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2431 misalign
? least_bit_hwi (misalign
) : align
);
2433 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2435 gimple_call_set_lhs (call
, make_ssa_name (vec_dest
));
2436 gimple_call_set_nothrow (call
, true);
2437 vect_finish_stmt_generation (stmt
, call
, gsi
);
2439 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= call
;
2441 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = call
;
2442 prev_stmt_info
= vinfo_for_stmt (call
);
2446 if (vls_type
== VLS_LOAD
)
2448 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2450 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2452 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2453 stmt_info
= vinfo_for_stmt (stmt
);
2455 tree lhs
= gimple_call_lhs (stmt
);
2456 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2457 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2458 set_vinfo_for_stmt (stmt
, NULL
);
2459 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2460 gsi_replace (gsi
, new_stmt
, true);
2466 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2469 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2470 gimple
**vec_stmt
, slp_tree slp_node
,
2471 tree vectype_in
, enum vect_def_type
*dt
)
2474 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2475 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2476 unsigned ncopies
, nunits
;
2478 op
= gimple_call_arg (stmt
, 0);
2479 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2480 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2482 /* Multiple types in SLP are handled by creating the appropriate number of
2483 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2488 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2490 gcc_assert (ncopies
>= 1);
2492 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2496 unsigned int num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2497 unsigned word_bytes
= num_bytes
/ nunits
;
2499 auto_vec_perm_indices
elts (num_bytes
);
2500 for (unsigned i
= 0; i
< nunits
; ++i
)
2501 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2502 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2504 if (! can_vec_perm_p (TYPE_MODE (char_vectype
), false, &elts
))
2509 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2513 if (! PURE_SLP_STMT (stmt_info
))
2515 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2516 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2517 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2518 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2523 auto_vec
<tree
, 32> telts (num_bytes
);
2524 for (unsigned i
= 0; i
< num_bytes
; ++i
)
2525 telts
.quick_push (build_int_cst (char_type_node
, elts
[i
]));
2526 tree bswap_vconst
= build_vector (char_vectype
, telts
);
2529 vec
<tree
> vec_oprnds
= vNULL
;
2530 gimple
*new_stmt
= NULL
;
2531 stmt_vec_info prev_stmt_info
= NULL
;
2532 for (unsigned j
= 0; j
< ncopies
; j
++)
2536 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2538 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2540 /* Arguments are ready. create the new vector stmt. */
2543 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2545 tree tem
= make_ssa_name (char_vectype
);
2546 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2547 char_vectype
, vop
));
2548 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2549 tree tem2
= make_ssa_name (char_vectype
);
2550 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2551 tem
, tem
, bswap_vconst
);
2552 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2553 tem
= make_ssa_name (vectype
);
2554 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2556 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2558 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2565 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2567 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2569 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2572 vec_oprnds
.release ();
2576 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2577 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2578 in a single step. On success, store the binary pack code in
2582 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2583 tree_code
*convert_code
)
2585 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2586 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2590 int multi_step_cvt
= 0;
2591 auto_vec
<tree
, 8> interm_types
;
2592 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2593 &code
, &multi_step_cvt
,
2598 *convert_code
= code
;
2602 /* Function vectorizable_call.
2604 Check if GS performs a function call that can be vectorized.
2605 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2606 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2607 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2610 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2617 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2618 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2619 tree vectype_out
, vectype_in
;
2622 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2623 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2624 vec_info
*vinfo
= stmt_info
->vinfo
;
2625 tree fndecl
, new_temp
, rhs_type
;
2627 enum vect_def_type dt
[3]
2628 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2630 gimple
*new_stmt
= NULL
;
2632 vec
<tree
> vargs
= vNULL
;
2633 enum { NARROW
, NONE
, WIDEN
} modifier
;
2637 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2640 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2644 /* Is GS a vectorizable call? */
2645 stmt
= dyn_cast
<gcall
*> (gs
);
2649 if (gimple_call_internal_p (stmt
)
2650 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2651 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2652 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2655 if (gimple_call_lhs (stmt
) == NULL_TREE
2656 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2659 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2661 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2663 /* Process function arguments. */
2664 rhs_type
= NULL_TREE
;
2665 vectype_in
= NULL_TREE
;
2666 nargs
= gimple_call_num_args (stmt
);
2668 /* Bail out if the function has more than three arguments, we do not have
2669 interesting builtin functions to vectorize with more than two arguments
2670 except for fma. No arguments is also not good. */
2671 if (nargs
== 0 || nargs
> 3)
2674 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2675 if (gimple_call_internal_p (stmt
)
2676 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2679 rhs_type
= unsigned_type_node
;
2682 for (i
= 0; i
< nargs
; i
++)
2686 op
= gimple_call_arg (stmt
, i
);
2688 /* We can only handle calls with arguments of the same type. */
2690 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2692 if (dump_enabled_p ())
2693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2694 "argument types differ.\n");
2698 rhs_type
= TREE_TYPE (op
);
2700 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2702 if (dump_enabled_p ())
2703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2704 "use not simple.\n");
2709 vectype_in
= opvectype
;
2711 && opvectype
!= vectype_in
)
2713 if (dump_enabled_p ())
2714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2715 "argument vector types differ.\n");
2719 /* If all arguments are external or constant defs use a vector type with
2720 the same size as the output vector type. */
2722 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2724 gcc_assert (vectype_in
);
2727 if (dump_enabled_p ())
2729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2730 "no vectype for scalar type ");
2731 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2732 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2739 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2740 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2741 if (nunits_in
== nunits_out
/ 2)
2743 else if (nunits_out
== nunits_in
)
2745 else if (nunits_out
== nunits_in
/ 2)
2750 /* We only handle functions that do not read or clobber memory. */
2751 if (gimple_vuse (stmt
))
2753 if (dump_enabled_p ())
2754 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2755 "function reads from or writes to memory.\n");
2759 /* For now, we only vectorize functions if a target specific builtin
2760 is available. TODO -- in some cases, it might be profitable to
2761 insert the calls for pieces of the vector, in order to be able
2762 to vectorize other operations in the loop. */
2764 internal_fn ifn
= IFN_LAST
;
2765 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2766 tree callee
= gimple_call_fndecl (stmt
);
2768 /* First try using an internal function. */
2769 tree_code convert_code
= ERROR_MARK
;
2771 && (modifier
== NONE
2772 || (modifier
== NARROW
2773 && simple_integer_narrowing (vectype_out
, vectype_in
,
2775 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2778 /* If that fails, try asking for a target-specific built-in function. */
2779 if (ifn
== IFN_LAST
)
2781 if (cfn
!= CFN_LAST
)
2782 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2783 (cfn
, vectype_out
, vectype_in
);
2785 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2786 (callee
, vectype_out
, vectype_in
);
2789 if (ifn
== IFN_LAST
&& !fndecl
)
2791 if (cfn
== CFN_GOMP_SIMD_LANE
2794 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2795 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2796 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2797 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2799 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2800 { 0, 1, 2, ... vf - 1 } vector. */
2801 gcc_assert (nargs
== 0);
2803 else if (modifier
== NONE
2804 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2805 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2806 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2807 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2813 "function is not vectorizable.\n");
2820 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2821 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
2823 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
2825 /* Sanity check: make sure that at least one copy of the vectorized stmt
2826 needs to be generated. */
2827 gcc_assert (ncopies
>= 1);
2829 if (!vec_stmt
) /* transformation not required. */
2831 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2832 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2835 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
2836 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2837 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2838 vec_promote_demote
, stmt_info
, 0, vect_body
);
2845 if (dump_enabled_p ())
2846 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2849 scalar_dest
= gimple_call_lhs (stmt
);
2850 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2852 prev_stmt_info
= NULL
;
2853 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2855 tree prev_res
= NULL_TREE
;
2856 for (j
= 0; j
< ncopies
; ++j
)
2858 /* Build argument list for the vectorized call. */
2860 vargs
.create (nargs
);
2866 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2867 vec
<tree
> vec_oprnds0
;
2869 for (i
= 0; i
< nargs
; i
++)
2870 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2871 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
2872 vec_oprnds0
= vec_defs
[0];
2874 /* Arguments are ready. Create the new vector stmt. */
2875 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2878 for (k
= 0; k
< nargs
; k
++)
2880 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2881 vargs
[k
] = vec_oprndsk
[i
];
2883 if (modifier
== NARROW
)
2885 tree half_res
= make_ssa_name (vectype_in
);
2887 = gimple_build_call_internal_vec (ifn
, vargs
);
2888 gimple_call_set_lhs (call
, half_res
);
2889 gimple_call_set_nothrow (call
, true);
2891 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2894 prev_res
= half_res
;
2897 new_temp
= make_ssa_name (vec_dest
);
2898 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2899 prev_res
, half_res
);
2904 if (ifn
!= IFN_LAST
)
2905 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2907 call
= gimple_build_call_vec (fndecl
, vargs
);
2908 new_temp
= make_ssa_name (vec_dest
, call
);
2909 gimple_call_set_lhs (call
, new_temp
);
2910 gimple_call_set_nothrow (call
, true);
2913 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2914 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2917 for (i
= 0; i
< nargs
; i
++)
2919 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2920 vec_oprndsi
.release ();
2925 for (i
= 0; i
< nargs
; i
++)
2927 op
= gimple_call_arg (stmt
, i
);
2930 = vect_get_vec_def_for_operand (op
, stmt
);
2933 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2935 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2938 vargs
.quick_push (vec_oprnd0
);
2941 if (gimple_call_internal_p (stmt
)
2942 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2944 auto_vec
<tree
, 32> v (nunits_out
);
2945 for (int k
= 0; k
< nunits_out
; ++k
)
2946 v
.quick_push (build_int_cst (unsigned_type_node
,
2947 j
* nunits_out
+ k
));
2948 tree cst
= build_vector (vectype_out
, v
);
2950 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2951 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2952 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2953 new_temp
= make_ssa_name (vec_dest
);
2954 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2956 else if (modifier
== NARROW
)
2958 tree half_res
= make_ssa_name (vectype_in
);
2959 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
2960 gimple_call_set_lhs (call
, half_res
);
2961 gimple_call_set_nothrow (call
, true);
2963 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2966 prev_res
= half_res
;
2969 new_temp
= make_ssa_name (vec_dest
);
2970 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2971 prev_res
, half_res
);
2976 if (ifn
!= IFN_LAST
)
2977 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2979 call
= gimple_build_call_vec (fndecl
, vargs
);
2980 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2981 gimple_call_set_lhs (call
, new_temp
);
2982 gimple_call_set_nothrow (call
, true);
2985 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2987 if (j
== (modifier
== NARROW
? 1 : 0))
2988 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2990 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2992 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2995 else if (modifier
== NARROW
)
2997 for (j
= 0; j
< ncopies
; ++j
)
2999 /* Build argument list for the vectorized call. */
3001 vargs
.create (nargs
* 2);
3007 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3008 vec
<tree
> vec_oprnds0
;
3010 for (i
= 0; i
< nargs
; i
++)
3011 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3012 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3013 vec_oprnds0
= vec_defs
[0];
3015 /* Arguments are ready. Create the new vector stmt. */
3016 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3020 for (k
= 0; k
< nargs
; k
++)
3022 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3023 vargs
.quick_push (vec_oprndsk
[i
]);
3024 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3027 if (ifn
!= IFN_LAST
)
3028 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3030 call
= gimple_build_call_vec (fndecl
, vargs
);
3031 new_temp
= make_ssa_name (vec_dest
, call
);
3032 gimple_call_set_lhs (call
, new_temp
);
3033 gimple_call_set_nothrow (call
, true);
3035 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3036 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3039 for (i
= 0; i
< nargs
; i
++)
3041 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3042 vec_oprndsi
.release ();
3047 for (i
= 0; i
< nargs
; i
++)
3049 op
= gimple_call_arg (stmt
, i
);
3053 = vect_get_vec_def_for_operand (op
, stmt
);
3055 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3059 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3061 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3063 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3066 vargs
.quick_push (vec_oprnd0
);
3067 vargs
.quick_push (vec_oprnd1
);
3070 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3071 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3072 gimple_call_set_lhs (new_stmt
, new_temp
);
3073 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3076 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3078 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3080 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3083 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3086 /* No current target implements this case. */
3091 /* The call in STMT might prevent it from being removed in dce.
3092 We however cannot remove it here, due to the way the ssa name
3093 it defines is mapped to the new definition. So just replace
3094 rhs of the statement with something harmless. */
3099 type
= TREE_TYPE (scalar_dest
);
3100 if (is_pattern_stmt_p (stmt_info
))
3101 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3103 lhs
= gimple_call_lhs (stmt
);
3105 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3106 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3107 set_vinfo_for_stmt (stmt
, NULL
);
3108 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3109 gsi_replace (gsi
, new_stmt
, false);
3115 struct simd_call_arg_info
3119 HOST_WIDE_INT linear_step
;
3120 enum vect_def_type dt
;
3122 bool simd_lane_linear
;
3125 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3126 is linear within simd lane (but not within whole loop), note it in
3130 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3131 struct simd_call_arg_info
*arginfo
)
3133 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3135 if (!is_gimple_assign (def_stmt
)
3136 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3137 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3140 tree base
= gimple_assign_rhs1 (def_stmt
);
3141 HOST_WIDE_INT linear_step
= 0;
3142 tree v
= gimple_assign_rhs2 (def_stmt
);
3143 while (TREE_CODE (v
) == SSA_NAME
)
3146 def_stmt
= SSA_NAME_DEF_STMT (v
);
3147 if (is_gimple_assign (def_stmt
))
3148 switch (gimple_assign_rhs_code (def_stmt
))
3151 t
= gimple_assign_rhs2 (def_stmt
);
3152 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3154 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3155 v
= gimple_assign_rhs1 (def_stmt
);
3158 t
= gimple_assign_rhs2 (def_stmt
);
3159 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3161 linear_step
= tree_to_shwi (t
);
3162 v
= gimple_assign_rhs1 (def_stmt
);
3165 t
= gimple_assign_rhs1 (def_stmt
);
3166 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3167 || (TYPE_PRECISION (TREE_TYPE (v
))
3168 < TYPE_PRECISION (TREE_TYPE (t
))))
3177 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3179 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3180 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3185 arginfo
->linear_step
= linear_step
;
3187 arginfo
->simd_lane_linear
= true;
3193 /* Function vectorizable_simd_clone_call.
3195 Check if STMT performs a function call that can be vectorized
3196 by calling a simd clone of the function.
3197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3202 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3203 gimple
**vec_stmt
, slp_tree slp_node
)
3208 tree vec_oprnd0
= NULL_TREE
;
3209 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3211 unsigned int nunits
;
3212 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3213 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3214 vec_info
*vinfo
= stmt_info
->vinfo
;
3215 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3216 tree fndecl
, new_temp
;
3218 gimple
*new_stmt
= NULL
;
3220 auto_vec
<simd_call_arg_info
> arginfo
;
3221 vec
<tree
> vargs
= vNULL
;
3223 tree lhs
, rtype
, ratype
;
3224 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
3226 /* Is STMT a vectorizable call? */
3227 if (!is_gimple_call (stmt
))
3230 fndecl
= gimple_call_fndecl (stmt
);
3231 if (fndecl
== NULL_TREE
)
3234 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3235 if (node
== NULL
|| node
->simd_clones
== NULL
)
3238 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3241 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3245 if (gimple_call_lhs (stmt
)
3246 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3249 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3251 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3253 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3260 /* Process function arguments. */
3261 nargs
= gimple_call_num_args (stmt
);
3263 /* Bail out if the function has zero arguments. */
3267 arginfo
.reserve (nargs
, true);
3269 for (i
= 0; i
< nargs
; i
++)
3271 simd_call_arg_info thisarginfo
;
3274 thisarginfo
.linear_step
= 0;
3275 thisarginfo
.align
= 0;
3276 thisarginfo
.op
= NULL_TREE
;
3277 thisarginfo
.simd_lane_linear
= false;
3279 op
= gimple_call_arg (stmt
, i
);
3280 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3281 &thisarginfo
.vectype
)
3282 || thisarginfo
.dt
== vect_uninitialized_def
)
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3286 "use not simple.\n");
3290 if (thisarginfo
.dt
== vect_constant_def
3291 || thisarginfo
.dt
== vect_external_def
)
3292 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3294 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3296 /* For linear arguments, the analyze phase should have saved
3297 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3298 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3299 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3301 gcc_assert (vec_stmt
);
3302 thisarginfo
.linear_step
3303 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3305 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3306 thisarginfo
.simd_lane_linear
3307 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3308 == boolean_true_node
);
3309 /* If loop has been peeled for alignment, we need to adjust it. */
3310 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3311 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3312 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3314 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3315 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3316 tree opt
= TREE_TYPE (thisarginfo
.op
);
3317 bias
= fold_convert (TREE_TYPE (step
), bias
);
3318 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3320 = fold_build2 (POINTER_TYPE_P (opt
)
3321 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3322 thisarginfo
.op
, bias
);
3326 && thisarginfo
.dt
!= vect_constant_def
3327 && thisarginfo
.dt
!= vect_external_def
3329 && TREE_CODE (op
) == SSA_NAME
3330 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3332 && tree_fits_shwi_p (iv
.step
))
3334 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3335 thisarginfo
.op
= iv
.base
;
3337 else if ((thisarginfo
.dt
== vect_constant_def
3338 || thisarginfo
.dt
== vect_external_def
)
3339 && POINTER_TYPE_P (TREE_TYPE (op
)))
3340 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3341 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3343 if (POINTER_TYPE_P (TREE_TYPE (op
))
3344 && !thisarginfo
.linear_step
3346 && thisarginfo
.dt
!= vect_constant_def
3347 && thisarginfo
.dt
!= vect_external_def
3350 && TREE_CODE (op
) == SSA_NAME
)
3351 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3353 arginfo
.quick_push (thisarginfo
);
3356 unsigned int badness
= 0;
3357 struct cgraph_node
*bestn
= NULL
;
3358 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3359 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3361 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3362 n
= n
->simdclone
->next_clone
)
3364 unsigned int this_badness
= 0;
3365 if (n
->simdclone
->simdlen
3366 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3367 || n
->simdclone
->nargs
!= nargs
)
3369 if (n
->simdclone
->simdlen
3370 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3371 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3372 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3373 if (n
->simdclone
->inbranch
)
3374 this_badness
+= 2048;
3375 int target_badness
= targetm
.simd_clone
.usable (n
);
3376 if (target_badness
< 0)
3378 this_badness
+= target_badness
* 512;
3379 /* FORNOW: Have to add code to add the mask argument. */
3380 if (n
->simdclone
->inbranch
)
3382 for (i
= 0; i
< nargs
; i
++)
3384 switch (n
->simdclone
->args
[i
].arg_type
)
3386 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3387 if (!useless_type_conversion_p
3388 (n
->simdclone
->args
[i
].orig_type
,
3389 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3391 else if (arginfo
[i
].dt
== vect_constant_def
3392 || arginfo
[i
].dt
== vect_external_def
3393 || arginfo
[i
].linear_step
)
3396 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3397 if (arginfo
[i
].dt
!= vect_constant_def
3398 && arginfo
[i
].dt
!= vect_external_def
)
3401 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3402 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3403 if (arginfo
[i
].dt
== vect_constant_def
3404 || arginfo
[i
].dt
== vect_external_def
3405 || (arginfo
[i
].linear_step
3406 != n
->simdclone
->args
[i
].linear_step
))
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3411 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3412 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3413 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3414 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3418 case SIMD_CLONE_ARG_TYPE_MASK
:
3421 if (i
== (size_t) -1)
3423 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3428 if (arginfo
[i
].align
)
3429 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3430 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3432 if (i
== (size_t) -1)
3434 if (bestn
== NULL
|| this_badness
< badness
)
3437 badness
= this_badness
;
3444 for (i
= 0; i
< nargs
; i
++)
3445 if ((arginfo
[i
].dt
== vect_constant_def
3446 || arginfo
[i
].dt
== vect_external_def
)
3447 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3450 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3452 if (arginfo
[i
].vectype
== NULL
3453 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3454 > bestn
->simdclone
->simdlen
))
3458 fndecl
= bestn
->decl
;
3459 nunits
= bestn
->simdclone
->simdlen
;
3460 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3462 /* If the function isn't const, only allow it in simd loops where user
3463 has asserted that at least nunits consecutive iterations can be
3464 performed using SIMD instructions. */
3465 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3466 && gimple_vuse (stmt
))
3469 /* Sanity check: make sure that at least one copy of the vectorized stmt
3470 needs to be generated. */
3471 gcc_assert (ncopies
>= 1);
3473 if (!vec_stmt
) /* transformation not required. */
3475 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3476 for (i
= 0; i
< nargs
; i
++)
3477 if ((bestn
->simdclone
->args
[i
].arg_type
3478 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3479 || (bestn
->simdclone
->args
[i
].arg_type
3480 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3482 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3484 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3485 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3486 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3487 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3488 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3489 tree sll
= arginfo
[i
].simd_lane_linear
3490 ? boolean_true_node
: boolean_false_node
;
3491 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3493 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3494 if (dump_enabled_p ())
3495 dump_printf_loc (MSG_NOTE
, vect_location
,
3496 "=== vectorizable_simd_clone_call ===\n");
3497 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3503 if (dump_enabled_p ())
3504 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3507 scalar_dest
= gimple_call_lhs (stmt
);
3508 vec_dest
= NULL_TREE
;
3513 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3514 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3515 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3518 rtype
= TREE_TYPE (ratype
);
3522 prev_stmt_info
= NULL
;
3523 for (j
= 0; j
< ncopies
; ++j
)
3525 /* Build argument list for the vectorized call. */
3527 vargs
.create (nargs
);
3531 for (i
= 0; i
< nargs
; i
++)
3533 unsigned int k
, l
, m
, o
;
3535 op
= gimple_call_arg (stmt
, i
);
3536 switch (bestn
->simdclone
->args
[i
].arg_type
)
3538 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3539 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3540 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3541 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3543 if (TYPE_VECTOR_SUBPARTS (atype
)
3544 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3546 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3547 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3548 / TYPE_VECTOR_SUBPARTS (atype
));
3549 gcc_assert ((k
& (k
- 1)) == 0);
3552 = vect_get_vec_def_for_operand (op
, stmt
);
3555 vec_oprnd0
= arginfo
[i
].op
;
3556 if ((m
& (k
- 1)) == 0)
3558 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3561 arginfo
[i
].op
= vec_oprnd0
;
3563 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3565 bitsize_int ((m
& (k
- 1)) * prec
));
3567 = gimple_build_assign (make_ssa_name (atype
),
3569 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3570 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3574 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3575 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3576 gcc_assert ((k
& (k
- 1)) == 0);
3577 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3579 vec_alloc (ctor_elts
, k
);
3582 for (l
= 0; l
< k
; l
++)
3584 if (m
== 0 && l
== 0)
3586 = vect_get_vec_def_for_operand (op
, stmt
);
3589 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3591 arginfo
[i
].op
= vec_oprnd0
;
3594 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3598 vargs
.safe_push (vec_oprnd0
);
3601 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3603 = gimple_build_assign (make_ssa_name (atype
),
3605 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3606 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3611 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3612 vargs
.safe_push (op
);
3614 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3615 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3620 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3625 edge pe
= loop_preheader_edge (loop
);
3626 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3627 gcc_assert (!new_bb
);
3629 if (arginfo
[i
].simd_lane_linear
)
3631 vargs
.safe_push (arginfo
[i
].op
);
3634 tree phi_res
= copy_ssa_name (op
);
3635 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3636 set_vinfo_for_stmt (new_phi
,
3637 new_stmt_vec_info (new_phi
, loop_vinfo
));
3638 add_phi_arg (new_phi
, arginfo
[i
].op
,
3639 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3641 = POINTER_TYPE_P (TREE_TYPE (op
))
3642 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3643 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3644 ? sizetype
: TREE_TYPE (op
);
3646 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3648 tree tcst
= wide_int_to_tree (type
, cst
);
3649 tree phi_arg
= copy_ssa_name (op
);
3651 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3652 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3653 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3654 set_vinfo_for_stmt (new_stmt
,
3655 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3656 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3658 arginfo
[i
].op
= phi_res
;
3659 vargs
.safe_push (phi_res
);
3664 = POINTER_TYPE_P (TREE_TYPE (op
))
3665 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3666 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3667 ? sizetype
: TREE_TYPE (op
);
3669 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3671 tree tcst
= wide_int_to_tree (type
, cst
);
3672 new_temp
= make_ssa_name (TREE_TYPE (op
));
3673 new_stmt
= gimple_build_assign (new_temp
, code
,
3674 arginfo
[i
].op
, tcst
);
3675 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3676 vargs
.safe_push (new_temp
);
3679 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3680 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3681 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3682 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3683 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3684 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3690 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3693 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3695 new_temp
= create_tmp_var (ratype
);
3696 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3697 == TYPE_VECTOR_SUBPARTS (rtype
))
3698 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3700 new_temp
= make_ssa_name (rtype
, new_stmt
);
3701 gimple_call_set_lhs (new_stmt
, new_temp
);
3703 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3707 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3710 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3711 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3712 gcc_assert ((k
& (k
- 1)) == 0);
3713 for (l
= 0; l
< k
; l
++)
3718 t
= build_fold_addr_expr (new_temp
);
3719 t
= build2 (MEM_REF
, vectype
, t
,
3720 build_int_cst (TREE_TYPE (t
),
3721 l
* prec
/ BITS_PER_UNIT
));
3724 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3725 bitsize_int (prec
), bitsize_int (l
* prec
));
3727 = gimple_build_assign (make_ssa_name (vectype
), t
);
3728 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3729 if (j
== 0 && l
== 0)
3730 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3732 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3734 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3739 tree clobber
= build_constructor (ratype
, NULL
);
3740 TREE_THIS_VOLATILE (clobber
) = 1;
3741 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3742 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3746 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3748 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3749 / TYPE_VECTOR_SUBPARTS (rtype
));
3750 gcc_assert ((k
& (k
- 1)) == 0);
3751 if ((j
& (k
- 1)) == 0)
3752 vec_alloc (ret_ctor_elts
, k
);
3755 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3756 for (m
= 0; m
< o
; m
++)
3758 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3759 size_int (m
), NULL_TREE
, NULL_TREE
);
3761 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3762 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3763 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3764 gimple_assign_lhs (new_stmt
));
3766 tree clobber
= build_constructor (ratype
, NULL
);
3767 TREE_THIS_VOLATILE (clobber
) = 1;
3768 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3769 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3772 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3773 if ((j
& (k
- 1)) != k
- 1)
3775 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3777 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3778 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3780 if ((unsigned) j
== k
- 1)
3781 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3783 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3785 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3790 tree t
= build_fold_addr_expr (new_temp
);
3791 t
= build2 (MEM_REF
, vectype
, t
,
3792 build_int_cst (TREE_TYPE (t
), 0));
3794 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3795 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3796 tree clobber
= build_constructor (ratype
, NULL
);
3797 TREE_THIS_VOLATILE (clobber
) = 1;
3798 vect_finish_stmt_generation (stmt
,
3799 gimple_build_assign (new_temp
,
3805 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3807 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3809 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3814 /* The call in STMT might prevent it from being removed in dce.
3815 We however cannot remove it here, due to the way the ssa name
3816 it defines is mapped to the new definition. So just replace
3817 rhs of the statement with something harmless. */
3824 type
= TREE_TYPE (scalar_dest
);
3825 if (is_pattern_stmt_p (stmt_info
))
3826 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3828 lhs
= gimple_call_lhs (stmt
);
3829 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3832 new_stmt
= gimple_build_nop ();
3833 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3834 set_vinfo_for_stmt (stmt
, NULL
);
3835 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3836 gsi_replace (gsi
, new_stmt
, true);
3837 unlink_stmt_vdef (stmt
);
3843 /* Function vect_gen_widened_results_half
3845 Create a vector stmt whose code, type, number of arguments, and result
3846 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3847 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3848 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3849 needs to be created (DECL is a function-decl of a target-builtin).
3850 STMT is the original scalar stmt that we are vectorizing. */
3853 vect_gen_widened_results_half (enum tree_code code
,
3855 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3856 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3862 /* Generate half of the widened result: */
3863 if (code
== CALL_EXPR
)
3865 /* Target specific support */
3866 if (op_type
== binary_op
)
3867 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3869 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3870 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3871 gimple_call_set_lhs (new_stmt
, new_temp
);
3875 /* Generic support */
3876 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3877 if (op_type
!= binary_op
)
3879 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3880 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3881 gimple_assign_set_lhs (new_stmt
, new_temp
);
3883 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3889 /* Get vectorized definitions for loop-based vectorization. For the first
3890 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3891 scalar operand), and for the rest we get a copy with
3892 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3893 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3894 The vectors are collected into VEC_OPRNDS. */
3897 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3898 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3902 /* Get first vector operand. */
3903 /* All the vector operands except the very first one (that is scalar oprnd)
3905 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3906 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3908 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3910 vec_oprnds
->quick_push (vec_oprnd
);
3912 /* Get second vector operand. */
3913 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3914 vec_oprnds
->quick_push (vec_oprnd
);
3918 /* For conversion in multiple steps, continue to get operands
3921 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3925 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3926 For multi-step conversions store the resulting vectors and call the function
3930 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3931 int multi_step_cvt
, gimple
*stmt
,
3933 gimple_stmt_iterator
*gsi
,
3934 slp_tree slp_node
, enum tree_code code
,
3935 stmt_vec_info
*prev_stmt_info
)
3938 tree vop0
, vop1
, new_tmp
, vec_dest
;
3940 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3942 vec_dest
= vec_dsts
.pop ();
3944 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3946 /* Create demotion operation. */
3947 vop0
= (*vec_oprnds
)[i
];
3948 vop1
= (*vec_oprnds
)[i
+ 1];
3949 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3950 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3951 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3952 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3955 /* Store the resulting vector for next recursive call. */
3956 (*vec_oprnds
)[i
/2] = new_tmp
;
3959 /* This is the last step of the conversion sequence. Store the
3960 vectors in SLP_NODE or in vector info of the scalar statement
3961 (or in STMT_VINFO_RELATED_STMT chain). */
3963 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3966 if (!*prev_stmt_info
)
3967 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3969 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3971 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3976 /* For multi-step demotion operations we first generate demotion operations
3977 from the source type to the intermediate types, and then combine the
3978 results (stored in VEC_OPRNDS) in demotion operation to the destination
3982 /* At each level of recursion we have half of the operands we had at the
3984 vec_oprnds
->truncate ((i
+1)/2);
3985 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3986 stmt
, vec_dsts
, gsi
, slp_node
,
3987 VEC_PACK_TRUNC_EXPR
,
3991 vec_dsts
.quick_push (vec_dest
);
3995 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3996 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3997 the resulting vectors and call the function recursively. */
4000 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4001 vec
<tree
> *vec_oprnds1
,
4002 gimple
*stmt
, tree vec_dest
,
4003 gimple_stmt_iterator
*gsi
,
4004 enum tree_code code1
,
4005 enum tree_code code2
, tree decl1
,
4006 tree decl2
, int op_type
)
4009 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4010 gimple
*new_stmt1
, *new_stmt2
;
4011 vec
<tree
> vec_tmp
= vNULL
;
4013 vec_tmp
.create (vec_oprnds0
->length () * 2);
4014 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4016 if (op_type
== binary_op
)
4017 vop1
= (*vec_oprnds1
)[i
];
4021 /* Generate the two halves of promotion operation. */
4022 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4023 op_type
, vec_dest
, gsi
, stmt
);
4024 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4025 op_type
, vec_dest
, gsi
, stmt
);
4026 if (is_gimple_call (new_stmt1
))
4028 new_tmp1
= gimple_call_lhs (new_stmt1
);
4029 new_tmp2
= gimple_call_lhs (new_stmt2
);
4033 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4034 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4037 /* Store the results for the next step. */
4038 vec_tmp
.quick_push (new_tmp1
);
4039 vec_tmp
.quick_push (new_tmp2
);
4042 vec_oprnds0
->release ();
4043 *vec_oprnds0
= vec_tmp
;
4047 /* Check if STMT performs a conversion operation, that can be vectorized.
4048 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4049 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4050 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4053 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4054 gimple
**vec_stmt
, slp_tree slp_node
)
4058 tree op0
, op1
= NULL_TREE
;
4059 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4060 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4061 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4062 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4063 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4064 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4067 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4069 gimple
*new_stmt
= NULL
;
4070 stmt_vec_info prev_stmt_info
;
4073 tree vectype_out
, vectype_in
;
4075 tree lhs_type
, rhs_type
;
4076 enum { NARROW
, NONE
, WIDEN
} modifier
;
4077 vec
<tree
> vec_oprnds0
= vNULL
;
4078 vec
<tree
> vec_oprnds1
= vNULL
;
4080 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4081 vec_info
*vinfo
= stmt_info
->vinfo
;
4082 int multi_step_cvt
= 0;
4083 vec
<tree
> interm_types
= vNULL
;
4084 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4086 unsigned short fltsz
;
4088 /* Is STMT a vectorizable conversion? */
4090 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4093 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4097 if (!is_gimple_assign (stmt
))
4100 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4103 code
= gimple_assign_rhs_code (stmt
);
4104 if (!CONVERT_EXPR_CODE_P (code
)
4105 && code
!= FIX_TRUNC_EXPR
4106 && code
!= FLOAT_EXPR
4107 && code
!= WIDEN_MULT_EXPR
4108 && code
!= WIDEN_LSHIFT_EXPR
)
4111 op_type
= TREE_CODE_LENGTH (code
);
4113 /* Check types of lhs and rhs. */
4114 scalar_dest
= gimple_assign_lhs (stmt
);
4115 lhs_type
= TREE_TYPE (scalar_dest
);
4116 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4118 op0
= gimple_assign_rhs1 (stmt
);
4119 rhs_type
= TREE_TYPE (op0
);
4121 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4122 && !((INTEGRAL_TYPE_P (lhs_type
)
4123 && INTEGRAL_TYPE_P (rhs_type
))
4124 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4125 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4128 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4129 && ((INTEGRAL_TYPE_P (lhs_type
)
4130 && !type_has_mode_precision_p (lhs_type
))
4131 || (INTEGRAL_TYPE_P (rhs_type
)
4132 && !type_has_mode_precision_p (rhs_type
))))
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4136 "type conversion to/from bit-precision unsupported."
4141 /* Check the operands of the operation. */
4142 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4144 if (dump_enabled_p ())
4145 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4146 "use not simple.\n");
4149 if (op_type
== binary_op
)
4153 op1
= gimple_assign_rhs2 (stmt
);
4154 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4155 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4157 if (CONSTANT_CLASS_P (op0
))
4158 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4160 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4164 if (dump_enabled_p ())
4165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4166 "use not simple.\n");
4171 /* If op0 is an external or constant defs use a vector type of
4172 the same size as the output vector type. */
4174 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4176 gcc_assert (vectype_in
);
4179 if (dump_enabled_p ())
4181 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4182 "no vectype for scalar type ");
4183 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4184 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4190 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4191 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4193 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4196 "can't convert between boolean and non "
4198 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4199 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4205 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4206 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4207 if (nunits_in
< nunits_out
)
4209 else if (nunits_out
== nunits_in
)
4214 /* Multiple types in SLP are handled by creating the appropriate number of
4215 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4219 else if (modifier
== NARROW
)
4220 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4222 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4224 /* Sanity check: make sure that at least one copy of the vectorized stmt
4225 needs to be generated. */
4226 gcc_assert (ncopies
>= 1);
4228 bool found_mode
= false;
4229 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4230 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4231 opt_scalar_mode rhs_mode_iter
;
4233 /* Supportable by target? */
4237 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4239 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4244 if (dump_enabled_p ())
4245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4246 "conversion not supported by target.\n");
4250 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4251 &code1
, &code2
, &multi_step_cvt
,
4254 /* Binary widening operation can only be supported directly by the
4256 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4260 if (code
!= FLOAT_EXPR
4261 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4264 fltsz
= GET_MODE_SIZE (lhs_mode
);
4265 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4267 rhs_mode
= rhs_mode_iter
.require ();
4268 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4272 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4273 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4274 if (cvt_type
== NULL_TREE
)
4277 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4279 if (!supportable_convert_operation (code
, vectype_out
,
4280 cvt_type
, &decl1
, &codecvt1
))
4283 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4284 cvt_type
, &codecvt1
,
4285 &codecvt2
, &multi_step_cvt
,
4289 gcc_assert (multi_step_cvt
== 0);
4291 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4292 vectype_in
, &code1
, &code2
,
4293 &multi_step_cvt
, &interm_types
))
4303 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4304 codecvt2
= ERROR_MARK
;
4308 interm_types
.safe_push (cvt_type
);
4309 cvt_type
= NULL_TREE
;
4314 gcc_assert (op_type
== unary_op
);
4315 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4316 &code1
, &multi_step_cvt
,
4320 if (code
!= FIX_TRUNC_EXPR
4321 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4325 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4326 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4327 if (cvt_type
== NULL_TREE
)
4329 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4332 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4333 &code1
, &multi_step_cvt
,
4342 if (!vec_stmt
) /* transformation not required. */
4344 if (dump_enabled_p ())
4345 dump_printf_loc (MSG_NOTE
, vect_location
,
4346 "=== vectorizable_conversion ===\n");
4347 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4349 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4350 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4352 else if (modifier
== NARROW
)
4354 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4355 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4359 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4360 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4362 interm_types
.release ();
4367 if (dump_enabled_p ())
4368 dump_printf_loc (MSG_NOTE
, vect_location
,
4369 "transform conversion. ncopies = %d.\n", ncopies
);
4371 if (op_type
== binary_op
)
4373 if (CONSTANT_CLASS_P (op0
))
4374 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4375 else if (CONSTANT_CLASS_P (op1
))
4376 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4379 /* In case of multi-step conversion, we first generate conversion operations
4380 to the intermediate types, and then from that types to the final one.
4381 We create vector destinations for the intermediate type (TYPES) received
4382 from supportable_*_operation, and store them in the correct order
4383 for future use in vect_create_vectorized_*_stmts (). */
4384 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4385 vec_dest
= vect_create_destination_var (scalar_dest
,
4386 (cvt_type
&& modifier
== WIDEN
)
4387 ? cvt_type
: vectype_out
);
4388 vec_dsts
.quick_push (vec_dest
);
4392 for (i
= interm_types
.length () - 1;
4393 interm_types
.iterate (i
, &intermediate_type
); i
--)
4395 vec_dest
= vect_create_destination_var (scalar_dest
,
4397 vec_dsts
.quick_push (vec_dest
);
4402 vec_dest
= vect_create_destination_var (scalar_dest
,
4404 ? vectype_out
: cvt_type
);
4408 if (modifier
== WIDEN
)
4410 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4411 if (op_type
== binary_op
)
4412 vec_oprnds1
.create (1);
4414 else if (modifier
== NARROW
)
4415 vec_oprnds0
.create (
4416 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4418 else if (code
== WIDEN_LSHIFT_EXPR
)
4419 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4422 prev_stmt_info
= NULL
;
4426 for (j
= 0; j
< ncopies
; j
++)
4429 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4431 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4433 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4435 /* Arguments are ready, create the new vector stmt. */
4436 if (code1
== CALL_EXPR
)
4438 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4439 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4440 gimple_call_set_lhs (new_stmt
, new_temp
);
4444 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4445 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4446 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4447 gimple_assign_set_lhs (new_stmt
, new_temp
);
4450 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4452 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4455 if (!prev_stmt_info
)
4456 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4458 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4459 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4466 /* In case the vectorization factor (VF) is bigger than the number
4467 of elements that we can fit in a vectype (nunits), we have to
4468 generate more than one vector stmt - i.e - we need to "unroll"
4469 the vector stmt by a factor VF/nunits. */
4470 for (j
= 0; j
< ncopies
; j
++)
4477 if (code
== WIDEN_LSHIFT_EXPR
)
4482 /* Store vec_oprnd1 for every vector stmt to be created
4483 for SLP_NODE. We check during the analysis that all
4484 the shift arguments are the same. */
4485 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4486 vec_oprnds1
.quick_push (vec_oprnd1
);
4488 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4492 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4493 &vec_oprnds1
, slp_node
);
4497 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4498 vec_oprnds0
.quick_push (vec_oprnd0
);
4499 if (op_type
== binary_op
)
4501 if (code
== WIDEN_LSHIFT_EXPR
)
4504 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4505 vec_oprnds1
.quick_push (vec_oprnd1
);
4511 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4512 vec_oprnds0
.truncate (0);
4513 vec_oprnds0
.quick_push (vec_oprnd0
);
4514 if (op_type
== binary_op
)
4516 if (code
== WIDEN_LSHIFT_EXPR
)
4519 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4521 vec_oprnds1
.truncate (0);
4522 vec_oprnds1
.quick_push (vec_oprnd1
);
4526 /* Arguments are ready. Create the new vector stmts. */
4527 for (i
= multi_step_cvt
; i
>= 0; i
--)
4529 tree this_dest
= vec_dsts
[i
];
4530 enum tree_code c1
= code1
, c2
= code2
;
4531 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4536 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4538 stmt
, this_dest
, gsi
,
4539 c1
, c2
, decl1
, decl2
,
4543 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4547 if (codecvt1
== CALL_EXPR
)
4549 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4550 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4551 gimple_call_set_lhs (new_stmt
, new_temp
);
4555 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4556 new_temp
= make_ssa_name (vec_dest
);
4557 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4561 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4564 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4567 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4570 if (!prev_stmt_info
)
4571 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4573 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4574 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4579 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4583 /* In case the vectorization factor (VF) is bigger than the number
4584 of elements that we can fit in a vectype (nunits), we have to
4585 generate more than one vector stmt - i.e - we need to "unroll"
4586 the vector stmt by a factor VF/nunits. */
4587 for (j
= 0; j
< ncopies
; j
++)
4591 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4595 vec_oprnds0
.truncate (0);
4596 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4597 vect_pow2 (multi_step_cvt
) - 1);
4600 /* Arguments are ready. Create the new vector stmts. */
4602 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4604 if (codecvt1
== CALL_EXPR
)
4606 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4607 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4608 gimple_call_set_lhs (new_stmt
, new_temp
);
4612 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4613 new_temp
= make_ssa_name (vec_dest
);
4614 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4618 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4619 vec_oprnds0
[i
] = new_temp
;
4622 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4623 stmt
, vec_dsts
, gsi
,
4628 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4632 vec_oprnds0
.release ();
4633 vec_oprnds1
.release ();
4634 interm_types
.release ();
4640 /* Function vectorizable_assignment.
4642 Check if STMT performs an assignment (copy) that can be vectorized.
4643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4648 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4649 gimple
**vec_stmt
, slp_tree slp_node
)
4654 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4655 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4658 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
4662 vec
<tree
> vec_oprnds
= vNULL
;
4664 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4665 vec_info
*vinfo
= stmt_info
->vinfo
;
4666 gimple
*new_stmt
= NULL
;
4667 stmt_vec_info prev_stmt_info
= NULL
;
4668 enum tree_code code
;
4671 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4674 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4678 /* Is vectorizable assignment? */
4679 if (!is_gimple_assign (stmt
))
4682 scalar_dest
= gimple_assign_lhs (stmt
);
4683 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4686 code
= gimple_assign_rhs_code (stmt
);
4687 if (gimple_assign_single_p (stmt
)
4688 || code
== PAREN_EXPR
4689 || CONVERT_EXPR_CODE_P (code
))
4690 op
= gimple_assign_rhs1 (stmt
);
4694 if (code
== VIEW_CONVERT_EXPR
)
4695 op
= TREE_OPERAND (op
, 0);
4697 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4698 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4700 /* Multiple types in SLP are handled by creating the appropriate number of
4701 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4706 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4708 gcc_assert (ncopies
>= 1);
4710 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4714 "use not simple.\n");
4718 /* We can handle NOP_EXPR conversions that do not change the number
4719 of elements or the vector size. */
4720 if ((CONVERT_EXPR_CODE_P (code
)
4721 || code
== VIEW_CONVERT_EXPR
)
4723 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4724 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4725 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4728 /* We do not handle bit-precision changes. */
4729 if ((CONVERT_EXPR_CODE_P (code
)
4730 || code
== VIEW_CONVERT_EXPR
)
4731 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4732 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
4733 || !type_has_mode_precision_p (TREE_TYPE (op
)))
4734 /* But a conversion that does not change the bit-pattern is ok. */
4735 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4736 > TYPE_PRECISION (TREE_TYPE (op
)))
4737 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4738 /* Conversion between boolean types of different sizes is
4739 a simple assignment in case their vectypes are same
4741 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4742 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4744 if (dump_enabled_p ())
4745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4746 "type conversion to/from bit-precision "
4751 if (!vec_stmt
) /* transformation not required. */
4753 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4754 if (dump_enabled_p ())
4755 dump_printf_loc (MSG_NOTE
, vect_location
,
4756 "=== vectorizable_assignment ===\n");
4757 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4766 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4769 for (j
= 0; j
< ncopies
; j
++)
4773 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
4775 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4777 /* Arguments are ready. create the new vector stmt. */
4778 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4780 if (CONVERT_EXPR_CODE_P (code
)
4781 || code
== VIEW_CONVERT_EXPR
)
4782 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4783 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4784 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4785 gimple_assign_set_lhs (new_stmt
, new_temp
);
4786 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4788 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4795 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4797 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4799 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4802 vec_oprnds
.release ();
4807 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4808 either as shift by a scalar or by a vector. */
4811 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4814 machine_mode vec_mode
;
4819 vectype
= get_vectype_for_scalar_type (scalar_type
);
4823 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4825 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4827 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4829 || (optab_handler (optab
, TYPE_MODE (vectype
))
4830 == CODE_FOR_nothing
))
4834 vec_mode
= TYPE_MODE (vectype
);
4835 icode
= (int) optab_handler (optab
, vec_mode
);
4836 if (icode
== CODE_FOR_nothing
)
4843 /* Function vectorizable_shift.
4845 Check if STMT performs a shift operation that can be vectorized.
4846 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4847 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4848 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4851 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4852 gimple
**vec_stmt
, slp_tree slp_node
)
4856 tree op0
, op1
= NULL
;
4857 tree vec_oprnd1
= NULL_TREE
;
4858 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4860 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4861 enum tree_code code
;
4862 machine_mode vec_mode
;
4866 machine_mode optab_op2_mode
;
4868 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4870 gimple
*new_stmt
= NULL
;
4871 stmt_vec_info prev_stmt_info
;
4878 vec
<tree
> vec_oprnds0
= vNULL
;
4879 vec
<tree
> vec_oprnds1
= vNULL
;
4882 bool scalar_shift_arg
= true;
4883 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4884 vec_info
*vinfo
= stmt_info
->vinfo
;
4886 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4889 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4893 /* Is STMT a vectorizable binary/unary operation? */
4894 if (!is_gimple_assign (stmt
))
4897 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4900 code
= gimple_assign_rhs_code (stmt
);
4902 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4903 || code
== RROTATE_EXPR
))
4906 scalar_dest
= gimple_assign_lhs (stmt
);
4907 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4908 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
4910 if (dump_enabled_p ())
4911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4912 "bit-precision shifts not supported.\n");
4916 op0
= gimple_assign_rhs1 (stmt
);
4917 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4919 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4921 "use not simple.\n");
4924 /* If op0 is an external or constant def use a vector type with
4925 the same size as the output vector type. */
4927 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4929 gcc_assert (vectype
);
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4934 "no vectype for scalar type\n");
4938 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4939 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4940 if (nunits_out
!= nunits_in
)
4943 op1
= gimple_assign_rhs2 (stmt
);
4944 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4946 if (dump_enabled_p ())
4947 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4948 "use not simple.\n");
4952 /* Multiple types in SLP are handled by creating the appropriate number of
4953 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4958 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4960 gcc_assert (ncopies
>= 1);
4962 /* Determine whether the shift amount is a vector, or scalar. If the
4963 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4965 if ((dt
[1] == vect_internal_def
4966 || dt
[1] == vect_induction_def
)
4968 scalar_shift_arg
= false;
4969 else if (dt
[1] == vect_constant_def
4970 || dt
[1] == vect_external_def
4971 || dt
[1] == vect_internal_def
)
4973 /* In SLP, need to check whether the shift count is the same,
4974 in loops if it is a constant or invariant, it is always
4978 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4981 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4982 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4983 scalar_shift_arg
= false;
4986 /* If the shift amount is computed by a pattern stmt we cannot
4987 use the scalar amount directly thus give up and use a vector
4989 if (dt
[1] == vect_internal_def
)
4991 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4992 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4993 scalar_shift_arg
= false;
4998 if (dump_enabled_p ())
4999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5000 "operand mode requires invariant argument.\n");
5004 /* Vector shifted by vector. */
5005 if (!scalar_shift_arg
)
5007 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE
, vect_location
,
5010 "vector/vector shift/rotate found.\n");
5013 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5014 if (op1_vectype
== NULL_TREE
5015 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5017 if (dump_enabled_p ())
5018 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5019 "unusable type for last operand in"
5020 " vector/vector shift/rotate.\n");
5024 /* See if the machine has a vector shifted by scalar insn and if not
5025 then see if it has a vector shifted by vector insn. */
5028 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5030 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5032 if (dump_enabled_p ())
5033 dump_printf_loc (MSG_NOTE
, vect_location
,
5034 "vector/scalar shift/rotate found.\n");
5038 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5040 && (optab_handler (optab
, TYPE_MODE (vectype
))
5041 != CODE_FOR_nothing
))
5043 scalar_shift_arg
= false;
5045 if (dump_enabled_p ())
5046 dump_printf_loc (MSG_NOTE
, vect_location
,
5047 "vector/vector shift/rotate found.\n");
5049 /* Unlike the other binary operators, shifts/rotates have
5050 the rhs being int, instead of the same type as the lhs,
5051 so make sure the scalar is the right type if we are
5052 dealing with vectors of long long/long/short/char. */
5053 if (dt
[1] == vect_constant_def
)
5054 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5055 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5059 && TYPE_MODE (TREE_TYPE (vectype
))
5060 != TYPE_MODE (TREE_TYPE (op1
)))
5062 if (dump_enabled_p ())
5063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5064 "unusable type for last operand in"
5065 " vector/vector shift/rotate.\n");
5068 if (vec_stmt
&& !slp_node
)
5070 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5071 op1
= vect_init_vector (stmt
, op1
,
5072 TREE_TYPE (vectype
), NULL
);
5079 /* Supportable by target? */
5082 if (dump_enabled_p ())
5083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5087 vec_mode
= TYPE_MODE (vectype
);
5088 icode
= (int) optab_handler (optab
, vec_mode
);
5089 if (icode
== CODE_FOR_nothing
)
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5093 "op not supported by target.\n");
5094 /* Check only during analysis. */
5095 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5097 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_NOTE
, vect_location
,
5101 "proceeding using word mode.\n");
5104 /* Worthwhile without SIMD support? Check only during analysis. */
5106 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5107 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5109 if (dump_enabled_p ())
5110 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5111 "not worthwhile without SIMD support.\n");
5115 if (!vec_stmt
) /* transformation not required. */
5117 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_NOTE
, vect_location
,
5120 "=== vectorizable_shift ===\n");
5121 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_NOTE
, vect_location
,
5129 "transform binary/unary operation.\n");
5132 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5134 prev_stmt_info
= NULL
;
5135 for (j
= 0; j
< ncopies
; j
++)
5140 if (scalar_shift_arg
)
5142 /* Vector shl and shr insn patterns can be defined with scalar
5143 operand 2 (shift operand). In this case, use constant or loop
5144 invariant op1 directly, without extending it to vector mode
5146 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5147 if (!VECTOR_MODE_P (optab_op2_mode
))
5149 if (dump_enabled_p ())
5150 dump_printf_loc (MSG_NOTE
, vect_location
,
5151 "operand 1 using scalar mode.\n");
5153 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5154 vec_oprnds1
.quick_push (vec_oprnd1
);
5157 /* Store vec_oprnd1 for every vector stmt to be created
5158 for SLP_NODE. We check during the analysis that all
5159 the shift arguments are the same.
5160 TODO: Allow different constants for different vector
5161 stmts generated for an SLP instance. */
5162 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5163 vec_oprnds1
.quick_push (vec_oprnd1
);
5168 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5169 (a special case for certain kind of vector shifts); otherwise,
5170 operand 1 should be of a vector type (the usual case). */
5172 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5175 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5179 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5181 /* Arguments are ready. Create the new vector stmt. */
5182 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5184 vop1
= vec_oprnds1
[i
];
5185 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5186 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5187 gimple_assign_set_lhs (new_stmt
, new_temp
);
5188 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5190 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5197 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5199 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5200 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5203 vec_oprnds0
.release ();
5204 vec_oprnds1
.release ();
5210 /* Function vectorizable_operation.
5212 Check if STMT performs a binary, unary or ternary operation that can
5214 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5215 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5216 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5219 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5220 gimple
**vec_stmt
, slp_tree slp_node
)
5224 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5225 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5227 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5228 enum tree_code code
;
5229 machine_mode vec_mode
;
5233 bool target_support_p
;
5235 enum vect_def_type dt
[3]
5236 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5238 gimple
*new_stmt
= NULL
;
5239 stmt_vec_info prev_stmt_info
;
5245 vec
<tree
> vec_oprnds0
= vNULL
;
5246 vec
<tree
> vec_oprnds1
= vNULL
;
5247 vec
<tree
> vec_oprnds2
= vNULL
;
5248 tree vop0
, vop1
, vop2
;
5249 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5250 vec_info
*vinfo
= stmt_info
->vinfo
;
5252 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5255 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5259 /* Is STMT a vectorizable binary/unary operation? */
5260 if (!is_gimple_assign (stmt
))
5263 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5266 code
= gimple_assign_rhs_code (stmt
);
5268 /* For pointer addition, we should use the normal plus for
5269 the vector addition. */
5270 if (code
== POINTER_PLUS_EXPR
)
5273 /* Support only unary or binary operations. */
5274 op_type
= TREE_CODE_LENGTH (code
);
5275 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5277 if (dump_enabled_p ())
5278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5279 "num. args = %d (not unary/binary/ternary op).\n",
5284 scalar_dest
= gimple_assign_lhs (stmt
);
5285 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5287 /* Most operations cannot handle bit-precision types without extra
5289 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5290 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5291 /* Exception are bitwise binary operations. */
5292 && code
!= BIT_IOR_EXPR
5293 && code
!= BIT_XOR_EXPR
5294 && code
!= BIT_AND_EXPR
)
5296 if (dump_enabled_p ())
5297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5298 "bit-precision arithmetic not supported.\n");
5302 op0
= gimple_assign_rhs1 (stmt
);
5303 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5305 if (dump_enabled_p ())
5306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5307 "use not simple.\n");
5310 /* If op0 is an external or constant def use a vector type with
5311 the same size as the output vector type. */
5314 /* For boolean type we cannot determine vectype by
5315 invariant value (don't know whether it is a vector
5316 of booleans or vector of integers). We use output
5317 vectype because operations on boolean don't change
5319 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5321 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5323 if (dump_enabled_p ())
5324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5325 "not supported operation on bool value.\n");
5328 vectype
= vectype_out
;
5331 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5334 gcc_assert (vectype
);
5337 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5340 "no vectype for scalar type ");
5341 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5343 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5349 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5350 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5351 if (nunits_out
!= nunits_in
)
5354 if (op_type
== binary_op
|| op_type
== ternary_op
)
5356 op1
= gimple_assign_rhs2 (stmt
);
5357 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5359 if (dump_enabled_p ())
5360 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5361 "use not simple.\n");
5365 if (op_type
== ternary_op
)
5367 op2
= gimple_assign_rhs3 (stmt
);
5368 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5370 if (dump_enabled_p ())
5371 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5372 "use not simple.\n");
5377 /* Multiple types in SLP are handled by creating the appropriate number of
5378 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5383 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5385 gcc_assert (ncopies
>= 1);
5387 /* Shifts are handled in vectorizable_shift (). */
5388 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5389 || code
== RROTATE_EXPR
)
5392 /* Supportable by target? */
5394 vec_mode
= TYPE_MODE (vectype
);
5395 if (code
== MULT_HIGHPART_EXPR
)
5396 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5399 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5402 if (dump_enabled_p ())
5403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5407 target_support_p
= (optab_handler (optab
, vec_mode
)
5408 != CODE_FOR_nothing
);
5411 if (!target_support_p
)
5413 if (dump_enabled_p ())
5414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5415 "op not supported by target.\n");
5416 /* Check only during analysis. */
5417 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5418 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5420 if (dump_enabled_p ())
5421 dump_printf_loc (MSG_NOTE
, vect_location
,
5422 "proceeding using word mode.\n");
5425 /* Worthwhile without SIMD support? Check only during analysis. */
5426 if (!VECTOR_MODE_P (vec_mode
)
5428 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5430 if (dump_enabled_p ())
5431 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5432 "not worthwhile without SIMD support.\n");
5436 if (!vec_stmt
) /* transformation not required. */
5438 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5439 if (dump_enabled_p ())
5440 dump_printf_loc (MSG_NOTE
, vect_location
,
5441 "=== vectorizable_operation ===\n");
5442 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5448 if (dump_enabled_p ())
5449 dump_printf_loc (MSG_NOTE
, vect_location
,
5450 "transform binary/unary operation.\n");
5453 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5455 /* In case the vectorization factor (VF) is bigger than the number
5456 of elements that we can fit in a vectype (nunits), we have to generate
5457 more than one vector stmt - i.e - we need to "unroll" the
5458 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5459 from one copy of the vector stmt to the next, in the field
5460 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5461 stages to find the correct vector defs to be used when vectorizing
5462 stmts that use the defs of the current stmt. The example below
5463 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5464 we need to create 4 vectorized stmts):
5466 before vectorization:
5467 RELATED_STMT VEC_STMT
5471 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5473 RELATED_STMT VEC_STMT
5474 VS1_0: vx0 = memref0 VS1_1 -
5475 VS1_1: vx1 = memref1 VS1_2 -
5476 VS1_2: vx2 = memref2 VS1_3 -
5477 VS1_3: vx3 = memref3 - -
5478 S1: x = load - VS1_0
5481 step2: vectorize stmt S2 (done here):
5482 To vectorize stmt S2 we first need to find the relevant vector
5483 def for the first operand 'x'. This is, as usual, obtained from
5484 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5485 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5486 relevant vector def 'vx0'. Having found 'vx0' we can generate
5487 the vector stmt VS2_0, and as usual, record it in the
5488 STMT_VINFO_VEC_STMT of stmt S2.
5489 When creating the second copy (VS2_1), we obtain the relevant vector
5490 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5491 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5492 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5493 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5494 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5495 chain of stmts and pointers:
5496 RELATED_STMT VEC_STMT
5497 VS1_0: vx0 = memref0 VS1_1 -
5498 VS1_1: vx1 = memref1 VS1_2 -
5499 VS1_2: vx2 = memref2 VS1_3 -
5500 VS1_3: vx3 = memref3 - -
5501 S1: x = load - VS1_0
5502 VS2_0: vz0 = vx0 + v1 VS2_1 -
5503 VS2_1: vz1 = vx1 + v1 VS2_2 -
5504 VS2_2: vz2 = vx2 + v1 VS2_3 -
5505 VS2_3: vz3 = vx3 + v1 - -
5506 S2: z = x + 1 - VS2_0 */
5508 prev_stmt_info
= NULL
;
5509 for (j
= 0; j
< ncopies
; j
++)
5514 if (op_type
== binary_op
|| op_type
== ternary_op
)
5515 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5518 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5520 if (op_type
== ternary_op
)
5521 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5526 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5527 if (op_type
== ternary_op
)
5529 tree vec_oprnd
= vec_oprnds2
.pop ();
5530 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5535 /* Arguments are ready. Create the new vector stmt. */
5536 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5538 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5539 ? vec_oprnds1
[i
] : NULL_TREE
);
5540 vop2
= ((op_type
== ternary_op
)
5541 ? vec_oprnds2
[i
] : NULL_TREE
);
5542 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5543 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5544 gimple_assign_set_lhs (new_stmt
, new_temp
);
5545 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5547 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5554 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5556 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5557 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5560 vec_oprnds0
.release ();
5561 vec_oprnds1
.release ();
5562 vec_oprnds2
.release ();
5567 /* A helper function to ensure data reference DR's base alignment. */
5570 ensure_base_align (struct data_reference
*dr
)
5575 if (DR_VECT_AUX (dr
)->base_misaligned
)
5577 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5579 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
5581 if (decl_in_symtab_p (base_decl
))
5582 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
5585 SET_DECL_ALIGN (base_decl
, align_base_to
);
5586 DECL_USER_ALIGN (base_decl
) = 1;
5588 DR_VECT_AUX (dr
)->base_misaligned
= false;
5593 /* Function get_group_alias_ptr_type.
5595 Return the alias type for the group starting at FIRST_STMT. */
5598 get_group_alias_ptr_type (gimple
*first_stmt
)
5600 struct data_reference
*first_dr
, *next_dr
;
5603 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5604 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5607 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5608 if (get_alias_set (DR_REF (first_dr
))
5609 != get_alias_set (DR_REF (next_dr
)))
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_NOTE
, vect_location
,
5613 "conflicting alias set types.\n");
5614 return ptr_type_node
;
5616 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5618 return reference_alias_ptr_type (DR_REF (first_dr
));
5622 /* Function vectorizable_store.
5624 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5626 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5627 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5628 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5631 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5637 tree vec_oprnd
= NULL_TREE
;
5638 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5639 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5641 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5642 struct loop
*loop
= NULL
;
5643 machine_mode vec_mode
;
5645 enum dr_alignment_support alignment_support_scheme
;
5647 enum vect_def_type dt
;
5648 stmt_vec_info prev_stmt_info
= NULL
;
5649 tree dataref_ptr
= NULL_TREE
;
5650 tree dataref_offset
= NULL_TREE
;
5651 gimple
*ptr_incr
= NULL
;
5654 gimple
*next_stmt
, *first_stmt
;
5656 unsigned int group_size
, i
;
5657 vec
<tree
> oprnds
= vNULL
;
5658 vec
<tree
> result_chain
= vNULL
;
5660 tree offset
= NULL_TREE
;
5661 vec
<tree
> vec_oprnds
= vNULL
;
5662 bool slp
= (slp_node
!= NULL
);
5663 unsigned int vec_num
;
5664 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5665 vec_info
*vinfo
= stmt_info
->vinfo
;
5667 gather_scatter_info gs_info
;
5668 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5671 vec_load_store_type vls_type
;
5674 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5677 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5681 /* Is vectorizable store? */
5683 if (!is_gimple_assign (stmt
))
5686 scalar_dest
= gimple_assign_lhs (stmt
);
5687 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5688 && is_pattern_stmt_p (stmt_info
))
5689 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5690 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5691 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5692 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5693 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5694 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5695 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5696 && TREE_CODE (scalar_dest
) != MEM_REF
)
5699 /* Cannot have hybrid store SLP -- that would mean storing to the
5700 same location twice. */
5701 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5703 gcc_assert (gimple_assign_single_p (stmt
));
5705 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5706 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5710 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5711 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5716 /* Multiple types in SLP are handled by creating the appropriate number of
5717 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5722 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5724 gcc_assert (ncopies
>= 1);
5726 /* FORNOW. This restriction should be relaxed. */
5727 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5731 "multiple types in nested loop.\n");
5735 op
= gimple_assign_rhs1 (stmt
);
5737 /* In the case this is a store from a constant make sure
5738 native_encode_expr can handle it. */
5739 if (CONSTANT_CLASS_P (op
) && native_encode_expr (op
, NULL
, 64) == 0)
5742 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5744 if (dump_enabled_p ())
5745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5746 "use not simple.\n");
5750 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5751 vls_type
= VLS_STORE_INVARIANT
;
5753 vls_type
= VLS_STORE
;
5755 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5758 elem_type
= TREE_TYPE (vectype
);
5759 vec_mode
= TYPE_MODE (vectype
);
5761 /* FORNOW. In some cases can vectorize even if data-type not supported
5762 (e.g. - array initialization with 0). */
5763 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5766 if (!STMT_VINFO_DATA_REF (stmt_info
))
5769 vect_memory_access_type memory_access_type
;
5770 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5771 &memory_access_type
, &gs_info
))
5774 if (!vec_stmt
) /* transformation not required. */
5776 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5777 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5778 /* The SLP costs are calculated during SLP analysis. */
5779 if (!PURE_SLP_STMT (stmt_info
))
5780 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5784 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5788 ensure_base_align (dr
);
5790 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5792 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5793 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5794 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5795 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5796 edge pe
= loop_preheader_edge (loop
);
5799 enum { NARROW
, NONE
, WIDEN
} modifier
;
5800 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5802 if (nunits
== (unsigned int) scatter_off_nunits
)
5804 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5808 auto_vec_perm_indices
sel (scatter_off_nunits
);
5809 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5810 sel
.quick_push (i
| nunits
);
5812 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
5813 gcc_assert (perm_mask
!= NULL_TREE
);
5815 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5819 auto_vec_perm_indices
sel (nunits
);
5820 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5821 sel
.quick_push (i
| scatter_off_nunits
);
5823 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5824 gcc_assert (perm_mask
!= NULL_TREE
);
5830 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5831 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5832 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5833 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5834 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5835 scaletype
= TREE_VALUE (arglist
);
5837 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5838 && TREE_CODE (rettype
) == VOID_TYPE
);
5840 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5841 if (!is_gimple_min_invariant (ptr
))
5843 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5844 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5845 gcc_assert (!new_bb
);
5848 /* Currently we support only unconditional scatter stores,
5849 so mask should be all ones. */
5850 mask
= build_int_cst (masktype
, -1);
5851 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5853 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5855 prev_stmt_info
= NULL
;
5856 for (j
= 0; j
< ncopies
; ++j
)
5861 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5863 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5865 else if (modifier
!= NONE
&& (j
& 1))
5867 if (modifier
== WIDEN
)
5870 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5871 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5874 else if (modifier
== NARROW
)
5876 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5879 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5888 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5890 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5894 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5896 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5897 == TYPE_VECTOR_SUBPARTS (srctype
));
5898 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5899 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5900 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5901 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5905 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5907 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5908 == TYPE_VECTOR_SUBPARTS (idxtype
));
5909 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5910 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5911 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5912 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5917 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5919 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5921 if (prev_stmt_info
== NULL
)
5922 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5924 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5925 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5930 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5933 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5934 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5935 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5937 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5940 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5942 /* We vectorize all the stmts of the interleaving group when we
5943 reach the last stmt in the group. */
5944 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5945 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5954 grouped_store
= false;
5955 /* VEC_NUM is the number of vect stmts to be created for this
5957 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5958 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5959 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5960 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5961 op
= gimple_assign_rhs1 (first_stmt
);
5964 /* VEC_NUM is the number of vect stmts to be created for this
5966 vec_num
= group_size
;
5968 ref_type
= get_group_alias_ptr_type (first_stmt
);
5974 group_size
= vec_num
= 1;
5975 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
5978 if (dump_enabled_p ())
5979 dump_printf_loc (MSG_NOTE
, vect_location
,
5980 "transform store. ncopies = %d\n", ncopies
);
5982 if (memory_access_type
== VMAT_ELEMENTWISE
5983 || memory_access_type
== VMAT_STRIDED_SLP
)
5985 gimple_stmt_iterator incr_gsi
;
5991 gimple_seq stmts
= NULL
;
5992 tree stride_base
, stride_step
, alias_off
;
5996 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5999 = fold_build_pointer_plus
6000 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
6001 size_binop (PLUS_EXPR
,
6002 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
6003 convert_to_ptrofftype (DR_INIT (first_dr
))));
6004 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
6006 /* For a store with loop-invariant (but other than power-of-2)
6007 stride (i.e. not a grouped access) like so:
6009 for (i = 0; i < n; i += stride)
6012 we generate a new induction variable and new stores from
6013 the components of the (vectorized) rhs:
6015 for (j = 0; ; j += VF*stride)
6020 array[j + stride] = tmp2;
6024 unsigned nstores
= nunits
;
6026 tree ltype
= elem_type
;
6027 tree lvectype
= vectype
;
6030 if (group_size
< nunits
6031 && nunits
% group_size
== 0)
6033 nstores
= nunits
/ group_size
;
6035 ltype
= build_vector_type (elem_type
, group_size
);
6038 /* First check if vec_extract optab doesn't support extraction
6039 of vector elts directly. */
6040 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6042 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6043 || !VECTOR_MODE_P (vmode
)
6044 || (convert_optab_handler (vec_extract_optab
,
6045 TYPE_MODE (vectype
), vmode
)
6046 == CODE_FOR_nothing
))
6048 /* Try to avoid emitting an extract of vector elements
6049 by performing the extracts using an integer type of the
6050 same size, extracting from a vector of those and then
6051 re-interpreting it as the original vector type if
6054 = group_size
* GET_MODE_BITSIZE (elmode
);
6055 elmode
= int_mode_for_size (lsize
, 0).require ();
6056 /* If we can't construct such a vector fall back to
6057 element extracts from the original vector type and
6058 element size stores. */
6059 if (mode_for_vector (elmode
,
6060 nunits
/ group_size
).exists (&vmode
)
6061 && VECTOR_MODE_P (vmode
)
6062 && (convert_optab_handler (vec_extract_optab
,
6064 != CODE_FOR_nothing
))
6066 nstores
= nunits
/ group_size
;
6068 ltype
= build_nonstandard_integer_type (lsize
, 1);
6069 lvectype
= build_vector_type (ltype
, nstores
);
6071 /* Else fall back to vector extraction anyway.
6072 Fewer stores are more important than avoiding spilling
6073 of the vector we extract from. Compared to the
6074 construction case in vectorizable_load no store-forwarding
6075 issue exists here for reasonable archs. */
6078 else if (group_size
>= nunits
6079 && group_size
% nunits
== 0)
6086 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6087 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6090 ivstep
= stride_step
;
6091 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6092 build_int_cst (TREE_TYPE (ivstep
), vf
));
6094 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6096 create_iv (stride_base
, ivstep
, NULL
,
6097 loop
, &incr_gsi
, insert_after
,
6099 incr
= gsi_stmt (incr_gsi
);
6100 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6102 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6104 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6106 prev_stmt_info
= NULL
;
6107 alias_off
= build_int_cst (ref_type
, 0);
6108 next_stmt
= first_stmt
;
6109 for (g
= 0; g
< group_size
; g
++)
6111 running_off
= offvar
;
6114 tree size
= TYPE_SIZE_UNIT (ltype
);
6115 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6117 tree newoff
= copy_ssa_name (running_off
, NULL
);
6118 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6120 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6121 running_off
= newoff
;
6123 unsigned int group_el
= 0;
6124 unsigned HOST_WIDE_INT
6125 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6126 for (j
= 0; j
< ncopies
; j
++)
6128 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6129 and first_stmt == stmt. */
6134 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6136 vec_oprnd
= vec_oprnds
[0];
6140 gcc_assert (gimple_assign_single_p (next_stmt
));
6141 op
= gimple_assign_rhs1 (next_stmt
);
6142 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6148 vec_oprnd
= vec_oprnds
[j
];
6151 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6152 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6155 /* Pun the vector to extract from if necessary. */
6156 if (lvectype
!= vectype
)
6158 tree tem
= make_ssa_name (lvectype
);
6160 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6161 lvectype
, vec_oprnd
));
6162 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6165 for (i
= 0; i
< nstores
; i
++)
6167 tree newref
, newoff
;
6168 gimple
*incr
, *assign
;
6169 tree size
= TYPE_SIZE (ltype
);
6170 /* Extract the i'th component. */
6171 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6172 bitsize_int (i
), size
);
6173 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6176 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6180 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6182 newref
= build2 (MEM_REF
, ltype
,
6183 running_off
, this_off
);
6185 /* And store it to *running_off. */
6186 assign
= gimple_build_assign (newref
, elem
);
6187 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6191 || group_el
== group_size
)
6193 newoff
= copy_ssa_name (running_off
, NULL
);
6194 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6195 running_off
, stride_step
);
6196 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6198 running_off
= newoff
;
6201 if (g
== group_size
- 1
6204 if (j
== 0 && i
== 0)
6205 STMT_VINFO_VEC_STMT (stmt_info
)
6206 = *vec_stmt
= assign
;
6208 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6209 prev_stmt_info
= vinfo_for_stmt (assign
);
6213 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6218 vec_oprnds
.release ();
6222 auto_vec
<tree
> dr_chain (group_size
);
6223 oprnds
.create (group_size
);
6225 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6226 gcc_assert (alignment_support_scheme
);
6227 /* Targets with store-lane instructions must not require explicit
6229 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6230 || alignment_support_scheme
== dr_aligned
6231 || alignment_support_scheme
== dr_unaligned_supported
);
6233 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6234 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6235 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6237 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6238 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6240 aggr_type
= vectype
;
6242 /* In case the vectorization factor (VF) is bigger than the number
6243 of elements that we can fit in a vectype (nunits), we have to generate
6244 more than one vector stmt - i.e - we need to "unroll" the
6245 vector stmt by a factor VF/nunits. For more details see documentation in
6246 vect_get_vec_def_for_copy_stmt. */
6248 /* In case of interleaving (non-unit grouped access):
6255 We create vectorized stores starting from base address (the access of the
6256 first stmt in the chain (S2 in the above example), when the last store stmt
6257 of the chain (S4) is reached:
6260 VS2: &base + vec_size*1 = vx0
6261 VS3: &base + vec_size*2 = vx1
6262 VS4: &base + vec_size*3 = vx3
6264 Then permutation statements are generated:
6266 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6267 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6270 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6271 (the order of the data-refs in the output of vect_permute_store_chain
6272 corresponds to the order of scalar stmts in the interleaving chain - see
6273 the documentation of vect_permute_store_chain()).
6275 In case of both multiple types and interleaving, above vector stores and
6276 permutation stmts are created for every copy. The result vector stmts are
6277 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6278 STMT_VINFO_RELATED_STMT for the next copies.
6281 prev_stmt_info
= NULL
;
6282 for (j
= 0; j
< ncopies
; j
++)
6289 /* Get vectorized arguments for SLP_NODE. */
6290 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6293 vec_oprnd
= vec_oprnds
[0];
6297 /* For interleaved stores we collect vectorized defs for all the
6298 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6299 used as an input to vect_permute_store_chain(), and OPRNDS as
6300 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6302 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6303 OPRNDS are of size 1. */
6304 next_stmt
= first_stmt
;
6305 for (i
= 0; i
< group_size
; i
++)
6307 /* Since gaps are not supported for interleaved stores,
6308 GROUP_SIZE is the exact number of stmts in the chain.
6309 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6310 there is no interleaving, GROUP_SIZE is 1, and only one
6311 iteration of the loop will be executed. */
6312 gcc_assert (next_stmt
6313 && gimple_assign_single_p (next_stmt
));
6314 op
= gimple_assign_rhs1 (next_stmt
);
6316 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6317 dr_chain
.quick_push (vec_oprnd
);
6318 oprnds
.quick_push (vec_oprnd
);
6319 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6323 /* We should have catched mismatched types earlier. */
6324 gcc_assert (useless_type_conversion_p (vectype
,
6325 TREE_TYPE (vec_oprnd
)));
6326 bool simd_lane_access_p
6327 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6328 if (simd_lane_access_p
6329 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6330 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6331 && integer_zerop (DR_OFFSET (first_dr
))
6332 && integer_zerop (DR_INIT (first_dr
))
6333 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6334 get_alias_set (TREE_TYPE (ref_type
))))
6336 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6337 dataref_offset
= build_int_cst (ref_type
, 0);
6342 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6343 simd_lane_access_p
? loop
: NULL
,
6344 offset
, &dummy
, gsi
, &ptr_incr
,
6345 simd_lane_access_p
, &inv_p
);
6346 gcc_assert (bb_vinfo
|| !inv_p
);
6350 /* For interleaved stores we created vectorized defs for all the
6351 defs stored in OPRNDS in the previous iteration (previous copy).
6352 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6353 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6355 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6356 OPRNDS are of size 1. */
6357 for (i
= 0; i
< group_size
; i
++)
6360 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6361 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6362 dr_chain
[i
] = vec_oprnd
;
6363 oprnds
[i
] = vec_oprnd
;
6367 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6368 TYPE_SIZE_UNIT (aggr_type
));
6370 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6371 TYPE_SIZE_UNIT (aggr_type
));
6374 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6378 /* Combine all the vectors into an array. */
6379 vec_array
= create_vector_array (vectype
, vec_num
);
6380 for (i
= 0; i
< vec_num
; i
++)
6382 vec_oprnd
= dr_chain
[i
];
6383 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6387 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6388 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6389 gcall
*call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
6391 gimple_call_set_lhs (call
, data_ref
);
6392 gimple_call_set_nothrow (call
, true);
6394 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6402 result_chain
.create (group_size
);
6404 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6408 next_stmt
= first_stmt
;
6409 for (i
= 0; i
< vec_num
; i
++)
6411 unsigned align
, misalign
;
6414 /* Bump the vector pointer. */
6415 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6419 vec_oprnd
= vec_oprnds
[i
];
6420 else if (grouped_store
)
6421 /* For grouped stores vectorized defs are interleaved in
6422 vect_permute_store_chain(). */
6423 vec_oprnd
= result_chain
[i
];
6425 data_ref
= fold_build2 (MEM_REF
, vectype
,
6429 : build_int_cst (ref_type
, 0));
6430 align
= DR_TARGET_ALIGNMENT (first_dr
);
6431 if (aligned_access_p (first_dr
))
6433 else if (DR_MISALIGNMENT (first_dr
) == -1)
6435 align
= dr_alignment (vect_dr_behavior (first_dr
));
6437 TREE_TYPE (data_ref
)
6438 = build_aligned_type (TREE_TYPE (data_ref
),
6439 align
* BITS_PER_UNIT
);
6443 TREE_TYPE (data_ref
)
6444 = build_aligned_type (TREE_TYPE (data_ref
),
6445 TYPE_ALIGN (elem_type
));
6446 misalign
= DR_MISALIGNMENT (first_dr
);
6448 if (dataref_offset
== NULL_TREE
6449 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6450 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6453 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6455 tree perm_mask
= perm_mask_for_reverse (vectype
);
6457 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6459 tree new_temp
= make_ssa_name (perm_dest
);
6461 /* Generate the permute statement. */
6463 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6464 vec_oprnd
, perm_mask
);
6465 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6467 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6468 vec_oprnd
= new_temp
;
6471 /* Arguments are ready. Create the new vector stmt. */
6472 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6473 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6478 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6486 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6488 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6489 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6494 result_chain
.release ();
6495 vec_oprnds
.release ();
6500 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6501 VECTOR_CST mask. No checks are made that the target platform supports the
6502 mask, so callers may wish to test can_vec_perm_p separately, or use
6503 vect_gen_perm_mask_checked. */
6506 vect_gen_perm_mask_any (tree vectype
, vec_perm_indices sel
)
6508 tree mask_elt_type
, mask_type
, mask_vec
;
6510 unsigned int nunits
= sel
.length ();
6511 gcc_checking_assert (nunits
== TYPE_VECTOR_SUBPARTS (vectype
));
6513 mask_elt_type
= lang_hooks
.types
.type_for_mode
6514 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
6515 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6517 auto_vec
<tree
, 32> mask_elts (nunits
);
6518 for (unsigned int i
= 0; i
< nunits
; ++i
)
6519 mask_elts
.quick_push (build_int_cst (mask_elt_type
, sel
[i
]));
6520 mask_vec
= build_vector (mask_type
, mask_elts
);
6525 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6526 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6529 vect_gen_perm_mask_checked (tree vectype
, vec_perm_indices sel
)
6531 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
));
6532 return vect_gen_perm_mask_any (vectype
, sel
);
6535 /* Given a vector variable X and Y, that was generated for the scalar
6536 STMT, generate instructions to permute the vector elements of X and Y
6537 using permutation mask MASK_VEC, insert them at *GSI and return the
6538 permuted vector variable. */
6541 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6542 gimple_stmt_iterator
*gsi
)
6544 tree vectype
= TREE_TYPE (x
);
6545 tree perm_dest
, data_ref
;
6548 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6549 data_ref
= make_ssa_name (perm_dest
);
6551 /* Generate the permute statement. */
6552 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6553 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6558 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6559 inserting them on the loops preheader edge. Returns true if we
6560 were successful in doing so (and thus STMT can be moved then),
6561 otherwise returns false. */
6564 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6570 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6572 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6573 if (!gimple_nop_p (def_stmt
)
6574 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6576 /* Make sure we don't need to recurse. While we could do
6577 so in simple cases when there are more complex use webs
6578 we don't have an easy way to preserve stmt order to fulfil
6579 dependencies within them. */
6582 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6584 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6586 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6587 if (!gimple_nop_p (def_stmt2
)
6588 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6598 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6600 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6601 if (!gimple_nop_p (def_stmt
)
6602 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6604 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6605 gsi_remove (&gsi
, false);
6606 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6613 /* vectorizable_load.
6615 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6617 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6618 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6619 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6622 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6623 slp_tree slp_node
, slp_instance slp_node_instance
)
6626 tree vec_dest
= NULL
;
6627 tree data_ref
= NULL
;
6628 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6629 stmt_vec_info prev_stmt_info
;
6630 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6631 struct loop
*loop
= NULL
;
6632 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6633 bool nested_in_vect_loop
= false;
6634 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6638 gimple
*new_stmt
= NULL
;
6640 enum dr_alignment_support alignment_support_scheme
;
6641 tree dataref_ptr
= NULL_TREE
;
6642 tree dataref_offset
= NULL_TREE
;
6643 gimple
*ptr_incr
= NULL
;
6645 int i
, j
, group_size
, group_gap_adj
;
6646 tree msq
= NULL_TREE
, lsq
;
6647 tree offset
= NULL_TREE
;
6648 tree byte_offset
= NULL_TREE
;
6649 tree realignment_token
= NULL_TREE
;
6651 vec
<tree
> dr_chain
= vNULL
;
6652 bool grouped_load
= false;
6654 gimple
*first_stmt_for_drptr
= NULL
;
6656 bool compute_in_loop
= false;
6657 struct loop
*at_loop
;
6659 bool slp
= (slp_node
!= NULL
);
6660 bool slp_perm
= false;
6661 enum tree_code code
;
6662 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6665 gather_scatter_info gs_info
;
6666 vec_info
*vinfo
= stmt_info
->vinfo
;
6669 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6672 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6676 /* Is vectorizable load? */
6677 if (!is_gimple_assign (stmt
))
6680 scalar_dest
= gimple_assign_lhs (stmt
);
6681 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6684 code
= gimple_assign_rhs_code (stmt
);
6685 if (code
!= ARRAY_REF
6686 && code
!= BIT_FIELD_REF
6687 && code
!= INDIRECT_REF
6688 && code
!= COMPONENT_REF
6689 && code
!= IMAGPART_EXPR
6690 && code
!= REALPART_EXPR
6692 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6695 if (!STMT_VINFO_DATA_REF (stmt_info
))
6698 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6699 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6703 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6704 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6705 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6710 /* Multiple types in SLP are handled by creating the appropriate number of
6711 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6716 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6718 gcc_assert (ncopies
>= 1);
6720 /* FORNOW. This restriction should be relaxed. */
6721 if (nested_in_vect_loop
&& ncopies
> 1)
6723 if (dump_enabled_p ())
6724 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6725 "multiple types in nested loop.\n");
6729 /* Invalidate assumptions made by dependence analysis when vectorization
6730 on the unrolled body effectively re-orders stmts. */
6732 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6733 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6734 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6736 if (dump_enabled_p ())
6737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6738 "cannot perform implicit CSE when unrolling "
6739 "with negative dependence distance\n");
6743 elem_type
= TREE_TYPE (vectype
);
6744 mode
= TYPE_MODE (vectype
);
6746 /* FORNOW. In some cases can vectorize even if data-type not supported
6747 (e.g. - data copies). */
6748 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6750 if (dump_enabled_p ())
6751 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6752 "Aligned load, but unsupported type.\n");
6756 /* Check if the load is a part of an interleaving chain. */
6757 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6759 grouped_load
= true;
6761 gcc_assert (!nested_in_vect_loop
);
6762 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6764 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6765 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6767 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6770 /* Invalidate assumptions made by dependence analysis when vectorization
6771 on the unrolled body effectively re-orders stmts. */
6772 if (!PURE_SLP_STMT (stmt_info
)
6773 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6774 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6775 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6777 if (dump_enabled_p ())
6778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6779 "cannot perform implicit CSE when performing "
6780 "group loads with negative dependence distance\n");
6784 /* Similarly when the stmt is a load that is both part of a SLP
6785 instance and a loop vectorized stmt via the same-dr mechanism
6786 we have to give up. */
6787 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6788 && (STMT_SLP_TYPE (stmt_info
)
6789 != STMT_SLP_TYPE (vinfo_for_stmt
6790 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6792 if (dump_enabled_p ())
6793 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6794 "conflicting SLP types for CSEd load\n");
6799 vect_memory_access_type memory_access_type
;
6800 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6801 &memory_access_type
, &gs_info
))
6804 if (!vec_stmt
) /* transformation not required. */
6807 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6808 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6809 /* The SLP costs are calculated during SLP analysis. */
6810 if (!PURE_SLP_STMT (stmt_info
))
6811 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6817 gcc_assert (memory_access_type
6818 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6820 if (dump_enabled_p ())
6821 dump_printf_loc (MSG_NOTE
, vect_location
,
6822 "transform load. ncopies = %d\n", ncopies
);
6826 ensure_base_align (dr
);
6828 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6830 tree vec_oprnd0
= NULL_TREE
, op
;
6831 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6832 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6833 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6834 edge pe
= loop_preheader_edge (loop
);
6837 enum { NARROW
, NONE
, WIDEN
} modifier
;
6838 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6840 if (nunits
== gather_off_nunits
)
6842 else if (nunits
== gather_off_nunits
/ 2)
6846 auto_vec_perm_indices
sel (gather_off_nunits
);
6847 for (i
= 0; i
< gather_off_nunits
; ++i
)
6848 sel
.quick_push (i
| nunits
);
6850 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
6852 else if (nunits
== gather_off_nunits
* 2)
6856 auto_vec_perm_indices
sel (nunits
);
6857 for (i
= 0; i
< nunits
; ++i
)
6858 sel
.quick_push (i
< gather_off_nunits
6859 ? i
: i
+ nunits
- gather_off_nunits
);
6861 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6867 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6868 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6869 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6870 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6871 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6872 scaletype
= TREE_VALUE (arglist
);
6873 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6875 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6877 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6878 if (!is_gimple_min_invariant (ptr
))
6880 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6881 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6882 gcc_assert (!new_bb
);
6885 /* Currently we support only unconditional gather loads,
6886 so mask should be all ones. */
6887 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6888 mask
= build_int_cst (masktype
, -1);
6889 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6891 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6892 mask
= build_vector_from_val (masktype
, mask
);
6893 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6895 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6899 for (j
= 0; j
< 6; ++j
)
6901 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6902 mask
= build_real (TREE_TYPE (masktype
), r
);
6903 mask
= build_vector_from_val (masktype
, mask
);
6904 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6909 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6911 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6912 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6913 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6917 for (j
= 0; j
< 6; ++j
)
6919 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6920 merge
= build_real (TREE_TYPE (rettype
), r
);
6924 merge
= build_vector_from_val (rettype
, merge
);
6925 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6927 prev_stmt_info
= NULL
;
6928 for (j
= 0; j
< ncopies
; ++j
)
6930 if (modifier
== WIDEN
&& (j
& 1))
6931 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6932 perm_mask
, stmt
, gsi
);
6935 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6938 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6940 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6942 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6943 == TYPE_VECTOR_SUBPARTS (idxtype
));
6944 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6945 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6947 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6948 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6953 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6955 if (!useless_type_conversion_p (vectype
, rettype
))
6957 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6958 == TYPE_VECTOR_SUBPARTS (rettype
));
6959 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6960 gimple_call_set_lhs (new_stmt
, op
);
6961 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6962 var
= make_ssa_name (vec_dest
);
6963 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6965 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6969 var
= make_ssa_name (vec_dest
, new_stmt
);
6970 gimple_call_set_lhs (new_stmt
, var
);
6973 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6975 if (modifier
== NARROW
)
6982 var
= permute_vec_elements (prev_res
, var
,
6983 perm_mask
, stmt
, gsi
);
6984 new_stmt
= SSA_NAME_DEF_STMT (var
);
6987 if (prev_stmt_info
== NULL
)
6988 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6990 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6991 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6996 if (memory_access_type
== VMAT_ELEMENTWISE
6997 || memory_access_type
== VMAT_STRIDED_SLP
)
6999 gimple_stmt_iterator incr_gsi
;
7005 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7006 gimple_seq stmts
= NULL
;
7007 tree stride_base
, stride_step
, alias_off
;
7009 gcc_assert (!nested_in_vect_loop
);
7011 if (slp
&& grouped_load
)
7013 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7014 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7015 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7016 ref_type
= get_group_alias_ptr_type (first_stmt
);
7023 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7027 = fold_build_pointer_plus
7028 (DR_BASE_ADDRESS (first_dr
),
7029 size_binop (PLUS_EXPR
,
7030 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7031 convert_to_ptrofftype (DR_INIT (first_dr
))));
7032 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7034 /* For a load with loop-invariant (but other than power-of-2)
7035 stride (i.e. not a grouped access) like so:
7037 for (i = 0; i < n; i += stride)
7040 we generate a new induction variable and new accesses to
7041 form a new vector (or vectors, depending on ncopies):
7043 for (j = 0; ; j += VF*stride)
7045 tmp2 = array[j + stride];
7047 vectemp = {tmp1, tmp2, ...}
7050 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7051 build_int_cst (TREE_TYPE (stride_step
), vf
));
7053 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7055 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
7056 loop
, &incr_gsi
, insert_after
,
7058 incr
= gsi_stmt (incr_gsi
);
7059 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7061 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
7062 &stmts
, true, NULL_TREE
);
7064 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
7066 prev_stmt_info
= NULL
;
7067 running_off
= offvar
;
7068 alias_off
= build_int_cst (ref_type
, 0);
7069 int nloads
= nunits
;
7071 tree ltype
= TREE_TYPE (vectype
);
7072 tree lvectype
= vectype
;
7073 auto_vec
<tree
> dr_chain
;
7074 if (memory_access_type
== VMAT_STRIDED_SLP
)
7076 if (group_size
< nunits
)
7078 /* First check if vec_init optab supports construction from
7079 vector elts directly. */
7080 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7082 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7083 && VECTOR_MODE_P (vmode
)
7084 && (convert_optab_handler (vec_init_optab
,
7085 TYPE_MODE (vectype
), vmode
)
7086 != CODE_FOR_nothing
))
7088 nloads
= nunits
/ group_size
;
7090 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7094 /* Otherwise avoid emitting a constructor of vector elements
7095 by performing the loads using an integer type of the same
7096 size, constructing a vector of those and then
7097 re-interpreting it as the original vector type.
7098 This avoids a huge runtime penalty due to the general
7099 inability to perform store forwarding from smaller stores
7100 to a larger load. */
7102 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7103 elmode
= int_mode_for_size (lsize
, 0).require ();
7104 /* If we can't construct such a vector fall back to
7105 element loads of the original vector type. */
7106 if (mode_for_vector (elmode
,
7107 nunits
/ group_size
).exists (&vmode
)
7108 && VECTOR_MODE_P (vmode
)
7109 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7110 != CODE_FOR_nothing
))
7112 nloads
= nunits
/ group_size
;
7114 ltype
= build_nonstandard_integer_type (lsize
, 1);
7115 lvectype
= build_vector_type (ltype
, nloads
);
7125 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7129 /* For SLP permutation support we need to load the whole group,
7130 not only the number of vector stmts the permutation result
7134 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
7135 dr_chain
.create (ncopies
);
7138 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7141 unsigned HOST_WIDE_INT
7142 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7143 for (j
= 0; j
< ncopies
; j
++)
7146 vec_alloc (v
, nloads
);
7147 for (i
= 0; i
< nloads
; i
++)
7149 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7151 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7152 build2 (MEM_REF
, ltype
,
7153 running_off
, this_off
));
7154 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7156 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7157 gimple_assign_lhs (new_stmt
));
7161 || group_el
== group_size
)
7163 tree newoff
= copy_ssa_name (running_off
);
7164 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7165 running_off
, stride_step
);
7166 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7168 running_off
= newoff
;
7174 tree vec_inv
= build_constructor (lvectype
, v
);
7175 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7176 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7177 if (lvectype
!= vectype
)
7179 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7181 build1 (VIEW_CONVERT_EXPR
,
7182 vectype
, new_temp
));
7183 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7190 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7192 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7197 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7199 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7200 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7206 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7207 slp_node_instance
, false, &n_perms
);
7214 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7215 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7216 /* For SLP vectorization we directly vectorize a subchain
7217 without permutation. */
7218 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7219 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7220 /* For BB vectorization always use the first stmt to base
7221 the data ref pointer on. */
7223 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7225 /* Check if the chain of loads is already vectorized. */
7226 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7227 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7228 ??? But we can only do so if there is exactly one
7229 as we have no way to get at the rest. Leave the CSE
7231 ??? With the group load eventually participating
7232 in multiple different permutations (having multiple
7233 slp nodes which refer to the same group) the CSE
7234 is even wrong code. See PR56270. */
7237 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7240 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7243 /* VEC_NUM is the number of vect stmts to be created for this group. */
7246 grouped_load
= false;
7247 /* For SLP permutation support we need to load the whole group,
7248 not only the number of vector stmts the permutation result
7252 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
7253 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7257 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7259 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7263 vec_num
= group_size
;
7265 ref_type
= get_group_alias_ptr_type (first_stmt
);
7271 group_size
= vec_num
= 1;
7273 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7276 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7277 gcc_assert (alignment_support_scheme
);
7278 /* Targets with load-lane instructions must not require explicit
7280 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7281 || alignment_support_scheme
== dr_aligned
7282 || alignment_support_scheme
== dr_unaligned_supported
);
7284 /* In case the vectorization factor (VF) is bigger than the number
7285 of elements that we can fit in a vectype (nunits), we have to generate
7286 more than one vector stmt - i.e - we need to "unroll" the
7287 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7288 from one copy of the vector stmt to the next, in the field
7289 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7290 stages to find the correct vector defs to be used when vectorizing
7291 stmts that use the defs of the current stmt. The example below
7292 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7293 need to create 4 vectorized stmts):
7295 before vectorization:
7296 RELATED_STMT VEC_STMT
7300 step 1: vectorize stmt S1:
7301 We first create the vector stmt VS1_0, and, as usual, record a
7302 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7303 Next, we create the vector stmt VS1_1, and record a pointer to
7304 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7305 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7307 RELATED_STMT VEC_STMT
7308 VS1_0: vx0 = memref0 VS1_1 -
7309 VS1_1: vx1 = memref1 VS1_2 -
7310 VS1_2: vx2 = memref2 VS1_3 -
7311 VS1_3: vx3 = memref3 - -
7312 S1: x = load - VS1_0
7315 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7316 information we recorded in RELATED_STMT field is used to vectorize
7319 /* In case of interleaving (non-unit grouped access):
7326 Vectorized loads are created in the order of memory accesses
7327 starting from the access of the first stmt of the chain:
7330 VS2: vx1 = &base + vec_size*1
7331 VS3: vx3 = &base + vec_size*2
7332 VS4: vx4 = &base + vec_size*3
7334 Then permutation statements are generated:
7336 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7337 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7340 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7341 (the order of the data-refs in the output of vect_permute_load_chain
7342 corresponds to the order of scalar stmts in the interleaving chain - see
7343 the documentation of vect_permute_load_chain()).
7344 The generation of permutation stmts and recording them in
7345 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7347 In case of both multiple types and interleaving, the vector loads and
7348 permutation stmts above are created for every copy. The result vector
7349 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7350 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7352 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7353 on a target that supports unaligned accesses (dr_unaligned_supported)
7354 we generate the following code:
7358 p = p + indx * vectype_size;
7363 Otherwise, the data reference is potentially unaligned on a target that
7364 does not support unaligned accesses (dr_explicit_realign_optimized) -
7365 then generate the following code, in which the data in each iteration is
7366 obtained by two vector loads, one from the previous iteration, and one
7367 from the current iteration:
7369 msq_init = *(floor(p1))
7370 p2 = initial_addr + VS - 1;
7371 realignment_token = call target_builtin;
7374 p2 = p2 + indx * vectype_size
7376 vec_dest = realign_load (msq, lsq, realignment_token)
7381 /* If the misalignment remains the same throughout the execution of the
7382 loop, we can create the init_addr and permutation mask at the loop
7383 preheader. Otherwise, it needs to be created inside the loop.
7384 This can only occur when vectorizing memory accesses in the inner-loop
7385 nested within an outer-loop that is being vectorized. */
7387 if (nested_in_vect_loop
7388 && (DR_STEP_ALIGNMENT (dr
) % GET_MODE_SIZE (TYPE_MODE (vectype
))) != 0)
7390 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7391 compute_in_loop
= true;
7394 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7395 || alignment_support_scheme
== dr_explicit_realign
)
7396 && !compute_in_loop
)
7398 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7399 alignment_support_scheme
, NULL_TREE
,
7401 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7403 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7404 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7411 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7412 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7414 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7415 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7417 aggr_type
= vectype
;
7419 prev_stmt_info
= NULL
;
7421 for (j
= 0; j
< ncopies
; j
++)
7423 /* 1. Create the vector or array pointer update chain. */
7426 bool simd_lane_access_p
7427 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7428 if (simd_lane_access_p
7429 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7430 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7431 && integer_zerop (DR_OFFSET (first_dr
))
7432 && integer_zerop (DR_INIT (first_dr
))
7433 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7434 get_alias_set (TREE_TYPE (ref_type
)))
7435 && (alignment_support_scheme
== dr_aligned
7436 || alignment_support_scheme
== dr_unaligned_supported
))
7438 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7439 dataref_offset
= build_int_cst (ref_type
, 0);
7442 else if (first_stmt_for_drptr
7443 && first_stmt
!= first_stmt_for_drptr
)
7446 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7447 at_loop
, offset
, &dummy
, gsi
,
7448 &ptr_incr
, simd_lane_access_p
,
7449 &inv_p
, byte_offset
);
7450 /* Adjust the pointer by the difference to first_stmt. */
7451 data_reference_p ptrdr
7452 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7453 tree diff
= fold_convert (sizetype
,
7454 size_binop (MINUS_EXPR
,
7457 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7462 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7463 offset
, &dummy
, gsi
, &ptr_incr
,
7464 simd_lane_access_p
, &inv_p
,
7467 else if (dataref_offset
)
7468 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7469 TYPE_SIZE_UNIT (aggr_type
));
7471 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7472 TYPE_SIZE_UNIT (aggr_type
));
7474 if (grouped_load
|| slp_perm
)
7475 dr_chain
.create (vec_num
);
7477 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7481 vec_array
= create_vector_array (vectype
, vec_num
);
7484 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7485 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7486 gcall
*call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1,
7488 gimple_call_set_lhs (call
, vec_array
);
7489 gimple_call_set_nothrow (call
, true);
7491 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7493 /* Extract each vector into an SSA_NAME. */
7494 for (i
= 0; i
< vec_num
; i
++)
7496 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7498 dr_chain
.quick_push (new_temp
);
7501 /* Record the mapping between SSA_NAMEs and statements. */
7502 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7506 for (i
= 0; i
< vec_num
; i
++)
7509 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7512 /* 2. Create the vector-load in the loop. */
7513 switch (alignment_support_scheme
)
7516 case dr_unaligned_supported
:
7518 unsigned int align
, misalign
;
7521 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7524 : build_int_cst (ref_type
, 0));
7525 align
= DR_TARGET_ALIGNMENT (dr
);
7526 if (alignment_support_scheme
== dr_aligned
)
7528 gcc_assert (aligned_access_p (first_dr
));
7531 else if (DR_MISALIGNMENT (first_dr
) == -1)
7533 align
= dr_alignment (vect_dr_behavior (first_dr
));
7535 TREE_TYPE (data_ref
)
7536 = build_aligned_type (TREE_TYPE (data_ref
),
7537 align
* BITS_PER_UNIT
);
7541 TREE_TYPE (data_ref
)
7542 = build_aligned_type (TREE_TYPE (data_ref
),
7543 TYPE_ALIGN (elem_type
));
7544 misalign
= DR_MISALIGNMENT (first_dr
);
7546 if (dataref_offset
== NULL_TREE
7547 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7548 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7552 case dr_explicit_realign
:
7556 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7558 if (compute_in_loop
)
7559 msq
= vect_setup_realignment (first_stmt
, gsi
,
7561 dr_explicit_realign
,
7564 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7565 ptr
= copy_ssa_name (dataref_ptr
);
7567 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7568 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7569 new_stmt
= gimple_build_assign
7570 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7572 (TREE_TYPE (dataref_ptr
),
7573 -(HOST_WIDE_INT
) align
));
7574 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7576 = build2 (MEM_REF
, vectype
, ptr
,
7577 build_int_cst (ref_type
, 0));
7578 vec_dest
= vect_create_destination_var (scalar_dest
,
7580 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7581 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7582 gimple_assign_set_lhs (new_stmt
, new_temp
);
7583 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7584 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7585 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7588 bump
= size_binop (MULT_EXPR
, vs
,
7589 TYPE_SIZE_UNIT (elem_type
));
7590 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7591 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7592 new_stmt
= gimple_build_assign
7593 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7595 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
7596 ptr
= copy_ssa_name (ptr
, new_stmt
);
7597 gimple_assign_set_lhs (new_stmt
, ptr
);
7598 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7600 = build2 (MEM_REF
, vectype
, ptr
,
7601 build_int_cst (ref_type
, 0));
7604 case dr_explicit_realign_optimized
:
7606 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7607 new_temp
= copy_ssa_name (dataref_ptr
);
7609 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7610 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7611 new_stmt
= gimple_build_assign
7612 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7613 build_int_cst (TREE_TYPE (dataref_ptr
),
7614 -(HOST_WIDE_INT
) align
));
7615 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7617 = build2 (MEM_REF
, vectype
, new_temp
,
7618 build_int_cst (ref_type
, 0));
7624 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7625 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7626 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7627 gimple_assign_set_lhs (new_stmt
, new_temp
);
7628 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7630 /* 3. Handle explicit realignment if necessary/supported.
7632 vec_dest = realign_load (msq, lsq, realignment_token) */
7633 if (alignment_support_scheme
== dr_explicit_realign_optimized
7634 || alignment_support_scheme
== dr_explicit_realign
)
7636 lsq
= gimple_assign_lhs (new_stmt
);
7637 if (!realignment_token
)
7638 realignment_token
= dataref_ptr
;
7639 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7640 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7641 msq
, lsq
, realignment_token
);
7642 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7643 gimple_assign_set_lhs (new_stmt
, new_temp
);
7644 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7646 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7649 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7650 add_phi_arg (phi
, lsq
,
7651 loop_latch_edge (containing_loop
),
7657 /* 4. Handle invariant-load. */
7658 if (inv_p
&& !bb_vinfo
)
7660 gcc_assert (!grouped_load
);
7661 /* If we have versioned for aliasing or the loop doesn't
7662 have any data dependencies that would preclude this,
7663 then we are sure this is a loop invariant load and
7664 thus we can insert it on the preheader edge. */
7665 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7666 && !nested_in_vect_loop
7667 && hoist_defs_of_uses (stmt
, loop
))
7669 if (dump_enabled_p ())
7671 dump_printf_loc (MSG_NOTE
, vect_location
,
7672 "hoisting out of the vectorized "
7674 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7676 tree tem
= copy_ssa_name (scalar_dest
);
7677 gsi_insert_on_edge_immediate
7678 (loop_preheader_edge (loop
),
7679 gimple_build_assign (tem
,
7681 (gimple_assign_rhs1 (stmt
))));
7682 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7683 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7684 set_vinfo_for_stmt (new_stmt
,
7685 new_stmt_vec_info (new_stmt
, vinfo
));
7689 gimple_stmt_iterator gsi2
= *gsi
;
7691 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7693 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7697 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7699 tree perm_mask
= perm_mask_for_reverse (vectype
);
7700 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7701 perm_mask
, stmt
, gsi
);
7702 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7705 /* Collect vector loads and later create their permutation in
7706 vect_transform_grouped_load (). */
7707 if (grouped_load
|| slp_perm
)
7708 dr_chain
.quick_push (new_temp
);
7710 /* Store vector loads in the corresponding SLP_NODE. */
7711 if (slp
&& !slp_perm
)
7712 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7714 /* With SLP permutation we load the gaps as well, without
7715 we need to skip the gaps after we manage to fully load
7716 all elements. group_gap_adj is GROUP_SIZE here. */
7717 group_elt
+= nunits
;
7718 if (group_gap_adj
!= 0 && ! slp_perm
7719 && group_elt
== group_size
- group_gap_adj
)
7721 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7723 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7724 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7729 /* Bump the vector pointer to account for a gap or for excess
7730 elements loaded for a permuted SLP load. */
7731 if (group_gap_adj
!= 0 && slp_perm
)
7733 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7735 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7736 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7741 if (slp
&& !slp_perm
)
7747 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7748 slp_node_instance
, false,
7751 dr_chain
.release ();
7759 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7760 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7761 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7766 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7768 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7769 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7772 dr_chain
.release ();
7778 /* Function vect_is_simple_cond.
7781 LOOP - the loop that is being vectorized.
7782 COND - Condition that is checked for simple use.
7785 *COMP_VECTYPE - the vector type for the comparison.
7786 *DTS - The def types for the arguments of the comparison
7788 Returns whether a COND can be vectorized. Checks whether
7789 condition operands are supportable using vec_is_simple_use. */
7792 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
7793 tree
*comp_vectype
, enum vect_def_type
*dts
)
7796 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7799 if (TREE_CODE (cond
) == SSA_NAME
7800 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
7802 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7803 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7804 &dts
[0], comp_vectype
)
7806 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7811 if (!COMPARISON_CLASS_P (cond
))
7814 lhs
= TREE_OPERAND (cond
, 0);
7815 rhs
= TREE_OPERAND (cond
, 1);
7817 if (TREE_CODE (lhs
) == SSA_NAME
)
7819 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7820 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
7823 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
7824 || TREE_CODE (lhs
) == FIXED_CST
)
7825 dts
[0] = vect_constant_def
;
7829 if (TREE_CODE (rhs
) == SSA_NAME
)
7831 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7832 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
7835 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
7836 || TREE_CODE (rhs
) == FIXED_CST
)
7837 dts
[1] = vect_constant_def
;
7841 if (vectype1
&& vectype2
7842 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7845 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7849 /* vectorizable_condition.
7851 Check if STMT is conditional modify expression that can be vectorized.
7852 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7853 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7856 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7857 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7858 else clause if it is 2).
7860 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7863 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7864 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7867 tree scalar_dest
= NULL_TREE
;
7868 tree vec_dest
= NULL_TREE
;
7869 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
7870 tree then_clause
, else_clause
;
7871 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7872 tree comp_vectype
= NULL_TREE
;
7873 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7874 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7877 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7878 enum vect_def_type dts
[4]
7879 = {vect_unknown_def_type
, vect_unknown_def_type
,
7880 vect_unknown_def_type
, vect_unknown_def_type
};
7883 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7884 stmt_vec_info prev_stmt_info
= NULL
;
7886 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7887 vec
<tree
> vec_oprnds0
= vNULL
;
7888 vec
<tree
> vec_oprnds1
= vNULL
;
7889 vec
<tree
> vec_oprnds2
= vNULL
;
7890 vec
<tree
> vec_oprnds3
= vNULL
;
7892 bool masked
= false;
7894 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7897 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7899 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7902 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7903 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7907 /* FORNOW: not yet supported. */
7908 if (STMT_VINFO_LIVE_P (stmt_info
))
7910 if (dump_enabled_p ())
7911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7912 "value used after loop.\n");
7917 /* Is vectorizable conditional operation? */
7918 if (!is_gimple_assign (stmt
))
7921 code
= gimple_assign_rhs_code (stmt
);
7923 if (code
!= COND_EXPR
)
7926 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7927 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7932 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7934 gcc_assert (ncopies
>= 1);
7935 if (reduc_index
&& ncopies
> 1)
7936 return false; /* FORNOW */
7938 cond_expr
= gimple_assign_rhs1 (stmt
);
7939 then_clause
= gimple_assign_rhs2 (stmt
);
7940 else_clause
= gimple_assign_rhs3 (stmt
);
7942 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
7943 &comp_vectype
, &dts
[0])
7948 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
7951 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
7955 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7958 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7961 masked
= !COMPARISON_CLASS_P (cond_expr
);
7962 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7964 if (vec_cmp_type
== NULL_TREE
)
7967 cond_code
= TREE_CODE (cond_expr
);
7970 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
7971 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
7974 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
7976 /* Boolean values may have another representation in vectors
7977 and therefore we prefer bit operations over comparison for
7978 them (which also works for scalar masks). We store opcodes
7979 to use in bitop1 and bitop2. Statement is vectorized as
7980 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7981 depending on bitop1 and bitop2 arity. */
7985 bitop1
= BIT_NOT_EXPR
;
7986 bitop2
= BIT_AND_EXPR
;
7989 bitop1
= BIT_NOT_EXPR
;
7990 bitop2
= BIT_IOR_EXPR
;
7993 bitop1
= BIT_NOT_EXPR
;
7994 bitop2
= BIT_AND_EXPR
;
7995 std::swap (cond_expr0
, cond_expr1
);
7998 bitop1
= BIT_NOT_EXPR
;
7999 bitop2
= BIT_IOR_EXPR
;
8000 std::swap (cond_expr0
, cond_expr1
);
8003 bitop1
= BIT_XOR_EXPR
;
8006 bitop1
= BIT_XOR_EXPR
;
8007 bitop2
= BIT_NOT_EXPR
;
8012 cond_code
= SSA_NAME
;
8017 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8018 if (bitop1
!= NOP_EXPR
)
8020 machine_mode mode
= TYPE_MODE (comp_vectype
);
8023 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8024 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8027 if (bitop2
!= NOP_EXPR
)
8029 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8031 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8035 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8038 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
8048 vec_oprnds0
.create (1);
8049 vec_oprnds1
.create (1);
8050 vec_oprnds2
.create (1);
8051 vec_oprnds3
.create (1);
8055 scalar_dest
= gimple_assign_lhs (stmt
);
8056 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8058 /* Handle cond expr. */
8059 for (j
= 0; j
< ncopies
; j
++)
8061 gassign
*new_stmt
= NULL
;
8066 auto_vec
<tree
, 4> ops
;
8067 auto_vec
<vec
<tree
>, 4> vec_defs
;
8070 ops
.safe_push (cond_expr
);
8073 ops
.safe_push (cond_expr0
);
8074 ops
.safe_push (cond_expr1
);
8076 ops
.safe_push (then_clause
);
8077 ops
.safe_push (else_clause
);
8078 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8079 vec_oprnds3
= vec_defs
.pop ();
8080 vec_oprnds2
= vec_defs
.pop ();
8082 vec_oprnds1
= vec_defs
.pop ();
8083 vec_oprnds0
= vec_defs
.pop ();
8091 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8093 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8099 = vect_get_vec_def_for_operand (cond_expr0
,
8100 stmt
, comp_vectype
);
8101 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8104 = vect_get_vec_def_for_operand (cond_expr1
,
8105 stmt
, comp_vectype
);
8106 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8108 if (reduc_index
== 1)
8109 vec_then_clause
= reduc_def
;
8112 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8114 vect_is_simple_use (then_clause
, loop_vinfo
,
8117 if (reduc_index
== 2)
8118 vec_else_clause
= reduc_def
;
8121 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8123 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8130 = vect_get_vec_def_for_stmt_copy (dts
[0],
8131 vec_oprnds0
.pop ());
8134 = vect_get_vec_def_for_stmt_copy (dts
[1],
8135 vec_oprnds1
.pop ());
8137 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8138 vec_oprnds2
.pop ());
8139 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8140 vec_oprnds3
.pop ());
8145 vec_oprnds0
.quick_push (vec_cond_lhs
);
8147 vec_oprnds1
.quick_push (vec_cond_rhs
);
8148 vec_oprnds2
.quick_push (vec_then_clause
);
8149 vec_oprnds3
.quick_push (vec_else_clause
);
8152 /* Arguments are ready. Create the new vector stmt. */
8153 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8155 vec_then_clause
= vec_oprnds2
[i
];
8156 vec_else_clause
= vec_oprnds3
[i
];
8159 vec_compare
= vec_cond_lhs
;
8162 vec_cond_rhs
= vec_oprnds1
[i
];
8163 if (bitop1
== NOP_EXPR
)
8164 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8165 vec_cond_lhs
, vec_cond_rhs
);
8168 new_temp
= make_ssa_name (vec_cmp_type
);
8169 if (bitop1
== BIT_NOT_EXPR
)
8170 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8174 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8176 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8177 if (bitop2
== NOP_EXPR
)
8178 vec_compare
= new_temp
;
8179 else if (bitop2
== BIT_NOT_EXPR
)
8181 /* Instead of doing ~x ? y : z do x ? z : y. */
8182 vec_compare
= new_temp
;
8183 std::swap (vec_then_clause
, vec_else_clause
);
8187 vec_compare
= make_ssa_name (vec_cmp_type
);
8189 = gimple_build_assign (vec_compare
, bitop2
,
8190 vec_cond_lhs
, new_temp
);
8191 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8195 new_temp
= make_ssa_name (vec_dest
);
8196 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8197 vec_compare
, vec_then_clause
,
8199 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8201 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8208 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8210 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8212 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8215 vec_oprnds0
.release ();
8216 vec_oprnds1
.release ();
8217 vec_oprnds2
.release ();
8218 vec_oprnds3
.release ();
8223 /* vectorizable_comparison.
8225 Check if STMT is comparison expression that can be vectorized.
8226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8227 comparison, put it in VEC_STMT, and insert it at GSI.
8229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8232 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8233 gimple
**vec_stmt
, tree reduc_def
,
8236 tree lhs
, rhs1
, rhs2
;
8237 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8238 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8239 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8240 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8242 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8243 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8247 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8248 stmt_vec_info prev_stmt_info
= NULL
;
8250 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8251 vec
<tree
> vec_oprnds0
= vNULL
;
8252 vec
<tree
> vec_oprnds1
= vNULL
;
8257 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8260 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8263 mask_type
= vectype
;
8264 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8269 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8271 gcc_assert (ncopies
>= 1);
8272 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8273 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8277 if (STMT_VINFO_LIVE_P (stmt_info
))
8279 if (dump_enabled_p ())
8280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8281 "value used after loop.\n");
8285 if (!is_gimple_assign (stmt
))
8288 code
= gimple_assign_rhs_code (stmt
);
8290 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8293 rhs1
= gimple_assign_rhs1 (stmt
);
8294 rhs2
= gimple_assign_rhs2 (stmt
);
8296 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8297 &dts
[0], &vectype1
))
8300 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8301 &dts
[1], &vectype2
))
8304 if (vectype1
&& vectype2
8305 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
8308 vectype
= vectype1
? vectype1
: vectype2
;
8310 /* Invariant comparison. */
8313 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8314 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
8317 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
8320 /* Can't compare mask and non-mask types. */
8321 if (vectype1
&& vectype2
8322 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8325 /* Boolean values may have another representation in vectors
8326 and therefore we prefer bit operations over comparison for
8327 them (which also works for scalar masks). We store opcodes
8328 to use in bitop1 and bitop2. Statement is vectorized as
8329 BITOP2 (rhs1 BITOP1 rhs2) or
8330 rhs1 BITOP2 (BITOP1 rhs2)
8331 depending on bitop1 and bitop2 arity. */
8332 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8334 if (code
== GT_EXPR
)
8336 bitop1
= BIT_NOT_EXPR
;
8337 bitop2
= BIT_AND_EXPR
;
8339 else if (code
== GE_EXPR
)
8341 bitop1
= BIT_NOT_EXPR
;
8342 bitop2
= BIT_IOR_EXPR
;
8344 else if (code
== LT_EXPR
)
8346 bitop1
= BIT_NOT_EXPR
;
8347 bitop2
= BIT_AND_EXPR
;
8348 std::swap (rhs1
, rhs2
);
8349 std::swap (dts
[0], dts
[1]);
8351 else if (code
== LE_EXPR
)
8353 bitop1
= BIT_NOT_EXPR
;
8354 bitop2
= BIT_IOR_EXPR
;
8355 std::swap (rhs1
, rhs2
);
8356 std::swap (dts
[0], dts
[1]);
8360 bitop1
= BIT_XOR_EXPR
;
8361 if (code
== EQ_EXPR
)
8362 bitop2
= BIT_NOT_EXPR
;
8368 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8369 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8370 dts
, ndts
, NULL
, NULL
);
8371 if (bitop1
== NOP_EXPR
)
8372 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8375 machine_mode mode
= TYPE_MODE (vectype
);
8378 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8379 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8382 if (bitop2
!= NOP_EXPR
)
8384 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8385 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8395 vec_oprnds0
.create (1);
8396 vec_oprnds1
.create (1);
8400 lhs
= gimple_assign_lhs (stmt
);
8401 mask
= vect_create_destination_var (lhs
, mask_type
);
8403 /* Handle cmp expr. */
8404 for (j
= 0; j
< ncopies
; j
++)
8406 gassign
*new_stmt
= NULL
;
8411 auto_vec
<tree
, 2> ops
;
8412 auto_vec
<vec
<tree
>, 2> vec_defs
;
8414 ops
.safe_push (rhs1
);
8415 ops
.safe_push (rhs2
);
8416 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8417 vec_oprnds1
= vec_defs
.pop ();
8418 vec_oprnds0
= vec_defs
.pop ();
8422 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8423 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8428 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8429 vec_oprnds0
.pop ());
8430 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8431 vec_oprnds1
.pop ());
8436 vec_oprnds0
.quick_push (vec_rhs1
);
8437 vec_oprnds1
.quick_push (vec_rhs2
);
8440 /* Arguments are ready. Create the new vector stmt. */
8441 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8443 vec_rhs2
= vec_oprnds1
[i
];
8445 new_temp
= make_ssa_name (mask
);
8446 if (bitop1
== NOP_EXPR
)
8448 new_stmt
= gimple_build_assign (new_temp
, code
,
8449 vec_rhs1
, vec_rhs2
);
8450 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8454 if (bitop1
== BIT_NOT_EXPR
)
8455 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8457 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8459 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8460 if (bitop2
!= NOP_EXPR
)
8462 tree res
= make_ssa_name (mask
);
8463 if (bitop2
== BIT_NOT_EXPR
)
8464 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8466 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8468 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8472 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8479 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8481 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8483 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8486 vec_oprnds0
.release ();
8487 vec_oprnds1
.release ();
8492 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8493 can handle all live statements in the node. Otherwise return true
8494 if STMT is not live or if vectorizable_live_operation can handle it.
8495 GSI and VEC_STMT are as for vectorizable_live_operation. */
8498 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8499 slp_tree slp_node
, gimple
**vec_stmt
)
8505 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8507 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8508 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8509 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8514 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
8515 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
))
8521 /* Make sure the statement is vectorizable. */
8524 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
8525 slp_instance node_instance
)
8527 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8528 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8529 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8531 gimple
*pattern_stmt
;
8532 gimple_seq pattern_def_seq
;
8534 if (dump_enabled_p ())
8536 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8537 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8540 if (gimple_has_volatile_ops (stmt
))
8542 if (dump_enabled_p ())
8543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8544 "not vectorized: stmt has volatile operands\n");
8549 /* Skip stmts that do not need to be vectorized. In loops this is expected
8551 - the COND_EXPR which is the loop exit condition
8552 - any LABEL_EXPRs in the loop
8553 - computations that are used only for array indexing or loop control.
8554 In basic blocks we only analyze statements that are a part of some SLP
8555 instance, therefore, all the statements are relevant.
8557 Pattern statement needs to be analyzed instead of the original statement
8558 if the original statement is not relevant. Otherwise, we analyze both
8559 statements. In basic blocks we are called from some SLP instance
8560 traversal, don't analyze pattern stmts instead, the pattern stmts
8561 already will be part of SLP instance. */
8563 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8564 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8565 && !STMT_VINFO_LIVE_P (stmt_info
))
8567 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8569 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8570 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8572 /* Analyze PATTERN_STMT instead of the original stmt. */
8573 stmt
= pattern_stmt
;
8574 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8575 if (dump_enabled_p ())
8577 dump_printf_loc (MSG_NOTE
, vect_location
,
8578 "==> examining pattern statement: ");
8579 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8584 if (dump_enabled_p ())
8585 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8590 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8593 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8594 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8596 /* Analyze PATTERN_STMT too. */
8597 if (dump_enabled_p ())
8599 dump_printf_loc (MSG_NOTE
, vect_location
,
8600 "==> examining pattern statement: ");
8601 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8604 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
8609 if (is_pattern_stmt_p (stmt_info
)
8611 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8613 gimple_stmt_iterator si
;
8615 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8617 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8618 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8619 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8621 /* Analyze def stmt of STMT if it's a pattern stmt. */
8622 if (dump_enabled_p ())
8624 dump_printf_loc (MSG_NOTE
, vect_location
,
8625 "==> examining pattern def statement: ");
8626 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8629 if (!vect_analyze_stmt (pattern_def_stmt
,
8630 need_to_vectorize
, node
, node_instance
))
8636 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8638 case vect_internal_def
:
8641 case vect_reduction_def
:
8642 case vect_nested_cycle
:
8643 gcc_assert (!bb_vinfo
8644 && (relevance
== vect_used_in_outer
8645 || relevance
== vect_used_in_outer_by_reduction
8646 || relevance
== vect_used_by_reduction
8647 || relevance
== vect_unused_in_scope
8648 || relevance
== vect_used_only_live
));
8651 case vect_induction_def
:
8652 gcc_assert (!bb_vinfo
);
8655 case vect_constant_def
:
8656 case vect_external_def
:
8657 case vect_unknown_def_type
:
8662 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8664 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8665 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8666 || (is_gimple_call (stmt
)
8667 && gimple_call_lhs (stmt
) == NULL_TREE
));
8668 *need_to_vectorize
= true;
8671 if (PURE_SLP_STMT (stmt_info
) && !node
)
8673 dump_printf_loc (MSG_NOTE
, vect_location
,
8674 "handled only by SLP analysis\n");
8680 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8681 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8682 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8683 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8684 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8685 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8686 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8687 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8688 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8689 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8690 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
)
8691 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
8692 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8693 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8697 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8698 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8699 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8700 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8701 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8702 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8703 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8704 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8705 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8706 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8711 if (dump_enabled_p ())
8713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8714 "not vectorized: relevant stmt not ");
8715 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8716 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8725 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8726 need extra handling, except for vectorizable reductions. */
8727 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8728 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
))
8730 if (dump_enabled_p ())
8732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8733 "not vectorized: live stmt not supported: ");
8734 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8744 /* Function vect_transform_stmt.
8746 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8749 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8750 bool *grouped_store
, slp_tree slp_node
,
8751 slp_instance slp_node_instance
)
8753 bool is_store
= false;
8754 gimple
*vec_stmt
= NULL
;
8755 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8758 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8759 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8761 switch (STMT_VINFO_TYPE (stmt_info
))
8763 case type_demotion_vec_info_type
:
8764 case type_promotion_vec_info_type
:
8765 case type_conversion_vec_info_type
:
8766 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8770 case induc_vec_info_type
:
8771 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
8775 case shift_vec_info_type
:
8776 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8780 case op_vec_info_type
:
8781 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8785 case assignment_vec_info_type
:
8786 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8790 case load_vec_info_type
:
8791 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8796 case store_vec_info_type
:
8797 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8799 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8801 /* In case of interleaving, the whole chain is vectorized when the
8802 last store in the chain is reached. Store stmts before the last
8803 one are skipped, and there vec_stmt_info shouldn't be freed
8805 *grouped_store
= true;
8806 if (STMT_VINFO_VEC_STMT (stmt_info
))
8813 case condition_vec_info_type
:
8814 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8818 case comparison_vec_info_type
:
8819 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8823 case call_vec_info_type
:
8824 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8825 stmt
= gsi_stmt (*gsi
);
8826 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
8830 case call_simd_clone_vec_info_type
:
8831 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8832 stmt
= gsi_stmt (*gsi
);
8835 case reduc_vec_info_type
:
8836 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
8842 if (!STMT_VINFO_LIVE_P (stmt_info
))
8844 if (dump_enabled_p ())
8845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8846 "stmt not supported.\n");
8851 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8852 This would break hybrid SLP vectorization. */
8854 gcc_assert (!vec_stmt
8855 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8857 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8858 is being vectorized, but outside the immediately enclosing loop. */
8860 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8861 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8862 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8863 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8864 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8865 || STMT_VINFO_RELEVANT (stmt_info
) ==
8866 vect_used_in_outer_by_reduction
))
8868 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8869 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8870 imm_use_iterator imm_iter
;
8871 use_operand_p use_p
;
8875 if (dump_enabled_p ())
8876 dump_printf_loc (MSG_NOTE
, vect_location
,
8877 "Record the vdef for outer-loop vectorization.\n");
8879 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8880 (to be used when vectorizing outer-loop stmts that use the DEF of
8882 if (gimple_code (stmt
) == GIMPLE_PHI
)
8883 scalar_dest
= PHI_RESULT (stmt
);
8885 scalar_dest
= gimple_assign_lhs (stmt
);
8887 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8889 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8891 exit_phi
= USE_STMT (use_p
);
8892 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8897 /* Handle stmts whose DEF is used outside the loop-nest that is
8898 being vectorized. */
8899 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8901 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
);
8906 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8912 /* Remove a group of stores (for SLP or interleaving), free their
8916 vect_remove_stores (gimple
*first_stmt
)
8918 gimple
*next
= first_stmt
;
8920 gimple_stmt_iterator next_si
;
8924 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8926 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8927 if (is_pattern_stmt_p (stmt_info
))
8928 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8929 /* Free the attached stmt_vec_info and remove the stmt. */
8930 next_si
= gsi_for_stmt (next
);
8931 unlink_stmt_vdef (next
);
8932 gsi_remove (&next_si
, true);
8933 release_defs (next
);
8934 free_stmt_vec_info (next
);
8940 /* Function new_stmt_vec_info.
8942 Create and initialize a new stmt_vec_info struct for STMT. */
8945 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8948 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8950 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8951 STMT_VINFO_STMT (res
) = stmt
;
8953 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8954 STMT_VINFO_LIVE_P (res
) = false;
8955 STMT_VINFO_VECTYPE (res
) = NULL
;
8956 STMT_VINFO_VEC_STMT (res
) = NULL
;
8957 STMT_VINFO_VECTORIZABLE (res
) = true;
8958 STMT_VINFO_IN_PATTERN_P (res
) = false;
8959 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8960 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8961 STMT_VINFO_DATA_REF (res
) = NULL
;
8962 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8963 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
8965 if (gimple_code (stmt
) == GIMPLE_PHI
8966 && is_loop_header_bb_p (gimple_bb (stmt
)))
8967 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8969 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8971 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8972 STMT_SLP_TYPE (res
) = loop_vect
;
8973 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8975 GROUP_FIRST_ELEMENT (res
) = NULL
;
8976 GROUP_NEXT_ELEMENT (res
) = NULL
;
8977 GROUP_SIZE (res
) = 0;
8978 GROUP_STORE_COUNT (res
) = 0;
8979 GROUP_GAP (res
) = 0;
8980 GROUP_SAME_DR_STMT (res
) = NULL
;
8986 /* Create a hash table for stmt_vec_info. */
8989 init_stmt_vec_info_vec (void)
8991 gcc_assert (!stmt_vec_info_vec
.exists ());
8992 stmt_vec_info_vec
.create (50);
8996 /* Free hash table for stmt_vec_info. */
8999 free_stmt_vec_info_vec (void)
9003 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
9005 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9006 gcc_assert (stmt_vec_info_vec
.exists ());
9007 stmt_vec_info_vec
.release ();
9011 /* Free stmt vectorization related info. */
9014 free_stmt_vec_info (gimple
*stmt
)
9016 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9021 /* Check if this statement has a related "pattern stmt"
9022 (introduced by the vectorizer during the pattern recognition
9023 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9025 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9027 stmt_vec_info patt_info
9028 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9031 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
9032 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9033 gimple_set_bb (patt_stmt
, NULL
);
9034 tree lhs
= gimple_get_lhs (patt_stmt
);
9035 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9036 release_ssa_name (lhs
);
9039 gimple_stmt_iterator si
;
9040 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
9042 gimple
*seq_stmt
= gsi_stmt (si
);
9043 gimple_set_bb (seq_stmt
, NULL
);
9044 lhs
= gimple_get_lhs (seq_stmt
);
9045 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9046 release_ssa_name (lhs
);
9047 free_stmt_vec_info (seq_stmt
);
9050 free_stmt_vec_info (patt_stmt
);
9054 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9055 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9056 set_vinfo_for_stmt (stmt
, NULL
);
9061 /* Function get_vectype_for_scalar_type_and_size.
9063 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9067 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
9069 tree orig_scalar_type
= scalar_type
;
9070 scalar_mode inner_mode
;
9071 machine_mode simd_mode
;
9075 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9076 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9079 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9081 /* For vector types of elements whose mode precision doesn't
9082 match their types precision we use a element type of mode
9083 precision. The vectorization routines will have to make sure
9084 they support the proper result truncation/extension.
9085 We also make sure to build vector types with INTEGER_TYPE
9086 component type only. */
9087 if (INTEGRAL_TYPE_P (scalar_type
)
9088 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9089 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9090 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9091 TYPE_UNSIGNED (scalar_type
));
9093 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9094 When the component mode passes the above test simply use a type
9095 corresponding to that mode. The theory is that any use that
9096 would cause problems with this will disable vectorization anyway. */
9097 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9098 && !INTEGRAL_TYPE_P (scalar_type
))
9099 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9101 /* We can't build a vector type of elements with alignment bigger than
9103 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9104 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9105 TYPE_UNSIGNED (scalar_type
));
9107 /* If we felt back to using the mode fail if there was
9108 no scalar type for it. */
9109 if (scalar_type
== NULL_TREE
)
9112 /* If no size was supplied use the mode the target prefers. Otherwise
9113 lookup a vector mode of the specified size. */
9115 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9116 else if (!mode_for_vector (inner_mode
, size
/ nbytes
).exists (&simd_mode
))
9118 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
9119 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9123 vectype
= build_vector_type (scalar_type
, nunits
);
9125 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9126 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9129 /* Re-attach the address-space qualifier if we canonicalized the scalar
9131 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9132 return build_qualified_type
9133 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9138 unsigned int current_vector_size
;
9140 /* Function get_vectype_for_scalar_type.
9142 Returns the vector type corresponding to SCALAR_TYPE as supported
9146 get_vectype_for_scalar_type (tree scalar_type
)
9149 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9150 current_vector_size
);
9152 && current_vector_size
== 0)
9153 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9157 /* Function get_mask_type_for_scalar_type.
9159 Returns the mask type corresponding to a result of comparison
9160 of vectors of specified SCALAR_TYPE as supported by target. */
9163 get_mask_type_for_scalar_type (tree scalar_type
)
9165 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9170 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9171 current_vector_size
);
9174 /* Function get_same_sized_vectype
9176 Returns a vector type corresponding to SCALAR_TYPE of size
9177 VECTOR_TYPE if supported by the target. */
9180 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9182 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9183 return build_same_sized_truth_vector_type (vector_type
);
9185 return get_vectype_for_scalar_type_and_size
9186 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9189 /* Function vect_is_simple_use.
9192 VINFO - the vect info of the loop or basic block that is being vectorized.
9193 OPERAND - operand in the loop or bb.
9195 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9196 DT - the type of definition
9198 Returns whether a stmt with OPERAND can be vectorized.
9199 For loops, supportable operands are constants, loop invariants, and operands
9200 that are defined by the current iteration of the loop. Unsupportable
9201 operands are those that are defined by a previous iteration of the loop (as
9202 is the case in reduction/induction computations).
9203 For basic blocks, supportable operands are constants and bb invariants.
9204 For now, operands defined outside the basic block are not supported. */
9207 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9208 gimple
**def_stmt
, enum vect_def_type
*dt
)
9211 *dt
= vect_unknown_def_type
;
9213 if (dump_enabled_p ())
9215 dump_printf_loc (MSG_NOTE
, vect_location
,
9216 "vect_is_simple_use: operand ");
9217 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9218 dump_printf (MSG_NOTE
, "\n");
9221 if (CONSTANT_CLASS_P (operand
))
9223 *dt
= vect_constant_def
;
9227 if (is_gimple_min_invariant (operand
))
9229 *dt
= vect_external_def
;
9233 if (TREE_CODE (operand
) != SSA_NAME
)
9235 if (dump_enabled_p ())
9236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9241 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9243 *dt
= vect_external_def
;
9247 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9248 if (dump_enabled_p ())
9250 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9251 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9254 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9255 *dt
= vect_external_def
;
9258 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9259 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9262 if (dump_enabled_p ())
9264 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9267 case vect_uninitialized_def
:
9268 dump_printf (MSG_NOTE
, "uninitialized\n");
9270 case vect_constant_def
:
9271 dump_printf (MSG_NOTE
, "constant\n");
9273 case vect_external_def
:
9274 dump_printf (MSG_NOTE
, "external\n");
9276 case vect_internal_def
:
9277 dump_printf (MSG_NOTE
, "internal\n");
9279 case vect_induction_def
:
9280 dump_printf (MSG_NOTE
, "induction\n");
9282 case vect_reduction_def
:
9283 dump_printf (MSG_NOTE
, "reduction\n");
9285 case vect_double_reduction_def
:
9286 dump_printf (MSG_NOTE
, "double reduction\n");
9288 case vect_nested_cycle
:
9289 dump_printf (MSG_NOTE
, "nested cycle\n");
9291 case vect_unknown_def_type
:
9292 dump_printf (MSG_NOTE
, "unknown\n");
9297 if (*dt
== vect_unknown_def_type
)
9299 if (dump_enabled_p ())
9300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9301 "Unsupported pattern.\n");
9305 switch (gimple_code (*def_stmt
))
9312 if (dump_enabled_p ())
9313 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9314 "unsupported defining stmt:\n");
9321 /* Function vect_is_simple_use.
9323 Same as vect_is_simple_use but also determines the vector operand
9324 type of OPERAND and stores it to *VECTYPE. If the definition of
9325 OPERAND is vect_uninitialized_def, vect_constant_def or
9326 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9327 is responsible to compute the best suited vector type for the
9331 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9332 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9334 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9337 /* Now get a vector type if the def is internal, otherwise supply
9338 NULL_TREE and leave it up to the caller to figure out a proper
9339 type for the use stmt. */
9340 if (*dt
== vect_internal_def
9341 || *dt
== vect_induction_def
9342 || *dt
== vect_reduction_def
9343 || *dt
== vect_double_reduction_def
9344 || *dt
== vect_nested_cycle
)
9346 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9348 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9349 && !STMT_VINFO_RELEVANT (stmt_info
)
9350 && !STMT_VINFO_LIVE_P (stmt_info
))
9351 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9353 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9354 gcc_assert (*vectype
!= NULL_TREE
);
9356 else if (*dt
== vect_uninitialized_def
9357 || *dt
== vect_constant_def
9358 || *dt
== vect_external_def
)
9359 *vectype
= NULL_TREE
;
9367 /* Function supportable_widening_operation
9369 Check whether an operation represented by the code CODE is a
9370 widening operation that is supported by the target platform in
9371 vector form (i.e., when operating on arguments of type VECTYPE_IN
9372 producing a result of type VECTYPE_OUT).
9374 Widening operations we currently support are NOP (CONVERT), FLOAT
9375 and WIDEN_MULT. This function checks if these operations are supported
9376 by the target platform either directly (via vector tree-codes), or via
9380 - CODE1 and CODE2 are codes of vector operations to be used when
9381 vectorizing the operation, if available.
9382 - MULTI_STEP_CVT determines the number of required intermediate steps in
9383 case of multi-step conversion (like char->short->int - in that case
9384 MULTI_STEP_CVT will be 1).
9385 - INTERM_TYPES contains the intermediate type required to perform the
9386 widening operation (short in the above example). */
9389 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9390 tree vectype_out
, tree vectype_in
,
9391 enum tree_code
*code1
, enum tree_code
*code2
,
9392 int *multi_step_cvt
,
9393 vec
<tree
> *interm_types
)
9395 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9396 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9397 struct loop
*vect_loop
= NULL
;
9398 machine_mode vec_mode
;
9399 enum insn_code icode1
, icode2
;
9400 optab optab1
, optab2
;
9401 tree vectype
= vectype_in
;
9402 tree wide_vectype
= vectype_out
;
9403 enum tree_code c1
, c2
;
9405 tree prev_type
, intermediate_type
;
9406 machine_mode intermediate_mode
, prev_mode
;
9407 optab optab3
, optab4
;
9409 *multi_step_cvt
= 0;
9411 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9415 case WIDEN_MULT_EXPR
:
9416 /* The result of a vectorized widening operation usually requires
9417 two vectors (because the widened results do not fit into one vector).
9418 The generated vector results would normally be expected to be
9419 generated in the same order as in the original scalar computation,
9420 i.e. if 8 results are generated in each vector iteration, they are
9421 to be organized as follows:
9422 vect1: [res1,res2,res3,res4],
9423 vect2: [res5,res6,res7,res8].
9425 However, in the special case that the result of the widening
9426 operation is used in a reduction computation only, the order doesn't
9427 matter (because when vectorizing a reduction we change the order of
9428 the computation). Some targets can take advantage of this and
9429 generate more efficient code. For example, targets like Altivec,
9430 that support widen_mult using a sequence of {mult_even,mult_odd}
9431 generate the following vectors:
9432 vect1: [res1,res3,res5,res7],
9433 vect2: [res2,res4,res6,res8].
9435 When vectorizing outer-loops, we execute the inner-loop sequentially
9436 (each vectorized inner-loop iteration contributes to VF outer-loop
9437 iterations in parallel). We therefore don't allow to change the
9438 order of the computation in the inner-loop during outer-loop
9440 /* TODO: Another case in which order doesn't *really* matter is when we
9441 widen and then contract again, e.g. (short)((int)x * y >> 8).
9442 Normally, pack_trunc performs an even/odd permute, whereas the
9443 repack from an even/odd expansion would be an interleave, which
9444 would be significantly simpler for e.g. AVX2. */
9445 /* In any case, in order to avoid duplicating the code below, recurse
9446 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9447 are properly set up for the caller. If we fail, we'll continue with
9448 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9450 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9451 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9452 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9453 stmt
, vectype_out
, vectype_in
,
9454 code1
, code2
, multi_step_cvt
,
9457 /* Elements in a vector with vect_used_by_reduction property cannot
9458 be reordered if the use chain with this property does not have the
9459 same operation. One such an example is s += a * b, where elements
9460 in a and b cannot be reordered. Here we check if the vector defined
9461 by STMT is only directly used in the reduction statement. */
9462 tree lhs
= gimple_assign_lhs (stmt
);
9463 use_operand_p dummy
;
9465 stmt_vec_info use_stmt_info
= NULL
;
9466 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9467 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9468 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9471 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9472 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9485 case VEC_WIDEN_MULT_EVEN_EXPR
:
9486 /* Support the recursion induced just above. */
9487 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9488 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9491 case WIDEN_LSHIFT_EXPR
:
9492 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9493 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9497 c1
= VEC_UNPACK_LO_EXPR
;
9498 c2
= VEC_UNPACK_HI_EXPR
;
9502 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9503 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9506 case FIX_TRUNC_EXPR
:
9507 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9508 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9509 computing the operation. */
9516 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9519 if (code
== FIX_TRUNC_EXPR
)
9521 /* The signedness is determined from output operand. */
9522 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9523 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9527 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9528 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9531 if (!optab1
|| !optab2
)
9534 vec_mode
= TYPE_MODE (vectype
);
9535 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9536 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9542 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9543 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9544 /* For scalar masks we may have different boolean
9545 vector types having the same QImode. Thus we
9546 add additional check for elements number. */
9547 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9548 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9549 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9551 /* Check if it's a multi-step conversion that can be done using intermediate
9554 prev_type
= vectype
;
9555 prev_mode
= vec_mode
;
9557 if (!CONVERT_EXPR_CODE_P (code
))
9560 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9561 intermediate steps in promotion sequence. We try
9562 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9564 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9565 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9567 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9568 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9571 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9572 current_vector_size
);
9573 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9578 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9579 TYPE_UNSIGNED (prev_type
));
9581 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9582 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9584 if (!optab3
|| !optab4
9585 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9586 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9587 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9588 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9589 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9590 == CODE_FOR_nothing
)
9591 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9592 == CODE_FOR_nothing
))
9595 interm_types
->quick_push (intermediate_type
);
9596 (*multi_step_cvt
)++;
9598 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9599 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9600 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9601 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9602 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9604 prev_type
= intermediate_type
;
9605 prev_mode
= intermediate_mode
;
9608 interm_types
->release ();
9613 /* Function supportable_narrowing_operation
9615 Check whether an operation represented by the code CODE is a
9616 narrowing operation that is supported by the target platform in
9617 vector form (i.e., when operating on arguments of type VECTYPE_IN
9618 and producing a result of type VECTYPE_OUT).
9620 Narrowing operations we currently support are NOP (CONVERT) and
9621 FIX_TRUNC. This function checks if these operations are supported by
9622 the target platform directly via vector tree-codes.
9625 - CODE1 is the code of a vector operation to be used when
9626 vectorizing the operation, if available.
9627 - MULTI_STEP_CVT determines the number of required intermediate steps in
9628 case of multi-step conversion (like int->short->char - in that case
9629 MULTI_STEP_CVT will be 1).
9630 - INTERM_TYPES contains the intermediate type required to perform the
9631 narrowing operation (short in the above example). */
9634 supportable_narrowing_operation (enum tree_code code
,
9635 tree vectype_out
, tree vectype_in
,
9636 enum tree_code
*code1
, int *multi_step_cvt
,
9637 vec
<tree
> *interm_types
)
9639 machine_mode vec_mode
;
9640 enum insn_code icode1
;
9641 optab optab1
, interm_optab
;
9642 tree vectype
= vectype_in
;
9643 tree narrow_vectype
= vectype_out
;
9645 tree intermediate_type
, prev_type
;
9646 machine_mode intermediate_mode
, prev_mode
;
9650 *multi_step_cvt
= 0;
9654 c1
= VEC_PACK_TRUNC_EXPR
;
9657 case FIX_TRUNC_EXPR
:
9658 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9662 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9663 tree code and optabs used for computing the operation. */
9670 if (code
== FIX_TRUNC_EXPR
)
9671 /* The signedness is determined from output operand. */
9672 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9674 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9679 vec_mode
= TYPE_MODE (vectype
);
9680 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9685 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9686 /* For scalar masks we may have different boolean
9687 vector types having the same QImode. Thus we
9688 add additional check for elements number. */
9689 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9690 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9691 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9693 /* Check if it's a multi-step conversion that can be done using intermediate
9695 prev_mode
= vec_mode
;
9696 prev_type
= vectype
;
9697 if (code
== FIX_TRUNC_EXPR
)
9698 uns
= TYPE_UNSIGNED (vectype_out
);
9700 uns
= TYPE_UNSIGNED (vectype
);
9702 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9703 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9704 costly than signed. */
9705 if (code
== FIX_TRUNC_EXPR
&& uns
)
9707 enum insn_code icode2
;
9710 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9712 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9713 if (interm_optab
!= unknown_optab
9714 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9715 && insn_data
[icode1
].operand
[0].mode
9716 == insn_data
[icode2
].operand
[0].mode
)
9719 optab1
= interm_optab
;
9724 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9725 intermediate steps in promotion sequence. We try
9726 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9727 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9728 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9730 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9731 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9734 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9735 current_vector_size
);
9736 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9741 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9743 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9746 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9747 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9748 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9749 == CODE_FOR_nothing
))
9752 interm_types
->quick_push (intermediate_type
);
9753 (*multi_step_cvt
)++;
9755 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9756 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9757 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9758 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9760 prev_mode
= intermediate_mode
;
9761 prev_type
= intermediate_type
;
9762 optab1
= interm_optab
;
9765 interm_types
->release ();