1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
94 if ((kind
== vector_load
|| kind
== unaligned_load
)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
96 kind
= vector_gather_load
;
97 if ((kind
== vector_store
|| kind
== unaligned_store
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_scatter_store
;
102 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
103 stmt_info_for_cost si
= { count
, kind
,
104 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
106 body_cost_vec
->safe_push (si
);
108 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
111 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
112 count
, kind
, stmt_info
, misalign
, where
);
115 /* Return a variable of type ELEM_TYPE[NELEMS]. */
118 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
120 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
124 /* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
130 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
131 tree array
, unsigned HOST_WIDE_INT n
)
133 tree vect_type
, vect
, vect_name
, array_ref
;
136 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
137 vect_type
= TREE_TYPE (TREE_TYPE (array
));
138 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
139 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
140 build_int_cst (size_type_node
, n
),
141 NULL_TREE
, NULL_TREE
);
143 new_stmt
= gimple_build_assign (vect
, array_ref
);
144 vect_name
= make_ssa_name (vect
, new_stmt
);
145 gimple_assign_set_lhs (new_stmt
, vect_name
);
146 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
151 /* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
156 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
157 tree array
, unsigned HOST_WIDE_INT n
)
162 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
163 build_int_cst (size_type_node
, n
),
164 NULL_TREE
, NULL_TREE
);
166 new_stmt
= gimple_build_assign (array_ref
, vect
);
167 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
170 /* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
175 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
179 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
187 /* Function vect_mark_relevant.
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
192 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
193 enum vect_relevant relevant
, bool live_p
)
195 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
196 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
197 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
198 gimple
*pattern_stmt
;
200 if (dump_enabled_p ())
202 dump_printf_loc (MSG_NOTE
, vect_location
,
203 "mark relevant %d, live %d: ", relevant
, live_p
);
204 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
213 /* This is the last stmt in a sequence that was detected as a
214 pattern that can potentially be vectorized. Don't mark the stmt
215 as relevant/live because it's not going to be vectorized.
216 Instead mark the pattern-stmt that replaces it. */
218 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE
, vect_location
,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_info
= vinfo_for_stmt (pattern_stmt
);
225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
226 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
227 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
231 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
232 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
233 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
235 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE
, vect_location
,
240 "already marked relevant/live.\n");
244 worklist
->safe_push (stmt
);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT is simple and all uses of it are invariant. */
253 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
259 if (!is_gimple_assign (stmt
))
262 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
264 enum vect_def_type dt
= vect_uninitialized_def
;
266 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
270 "use not simple.\n");
274 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
293 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
294 enum vect_relevant
*relevant
, bool *live_p
)
296 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
298 imm_use_iterator imm_iter
;
302 *relevant
= vect_unused_in_scope
;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt
)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
308 != loop_exit_ctrl_vec_info_type
)
309 *relevant
= vect_used_in_scope
;
311 /* changing memory. */
312 if (gimple_code (stmt
) != GIMPLE_PHI
)
313 if (gimple_vdef (stmt
)
314 && !gimple_clobber_p (stmt
))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE
, vect_location
,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant
= vect_used_in_scope
;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
325 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
327 basic_block bb
= gimple_bb (USE_STMT (use_p
));
328 if (!flow_bb_inside_loop_p (loop
, bb
))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE
, vect_location
,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p
)))
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
340 gcc_assert (bb
== single_exit (loop
)->dest
);
347 if (*live_p
&& *relevant
== vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE
, vect_location
,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant
= vect_used_only_live
;
356 return (*live_p
|| *relevant
);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT. Check if USE is
363 used in STMT for anything other than indexing an array. */
366 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
371 /* USE corresponds to some operand in STMT. If there is no data
372 reference in STMT, then any operand that corresponds to USE
373 is not indexing an array. */
374 if (!STMT_VINFO_DATA_REF (stmt_info
))
377 /* STMT has a data_ref. FORNOW this means that its of one of
381 (This should have been verified in analyze_data_refs).
383 'var' in the second case corresponds to a def, not a use,
384 so USE cannot correspond to any operands that are not used
387 Therefore, all we need to check is if STMT falls into the
388 first case, and whether var corresponds to USE. */
390 if (!gimple_assign_copy_p (stmt
))
392 if (is_gimple_call (stmt
)
393 && gimple_call_internal_p (stmt
))
394 switch (gimple_call_internal_fn (stmt
))
397 operand
= gimple_call_arg (stmt
, 3);
402 operand
= gimple_call_arg (stmt
, 2);
412 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
414 operand
= gimple_assign_rhs1 (stmt
);
415 if (TREE_CODE (operand
) != SSA_NAME
)
426 Function process_use.
429 - a USE in STMT in a loop represented by LOOP_VINFO
430 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
431 that defined USE. This is done by calling mark_relevant and passing it
432 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
433 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
437 Generally, LIVE_P and RELEVANT are used to define the liveness and
438 relevance info of the DEF_STMT of this USE:
439 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
440 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
442 - case 1: If USE is used only for address computations (e.g. array indexing),
443 which does not need to be directly vectorized, then the liveness/relevance
444 of the respective DEF_STMT is left unchanged.
445 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
446 skip DEF_STMT cause it had already been processed.
447 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
448 be modified accordingly.
450 Return true if everything is as expected. Return false otherwise. */
453 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
454 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
457 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
458 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
459 stmt_vec_info dstmt_vinfo
;
460 basic_block bb
, def_bb
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
469 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
473 "not vectorized: unsupported use in stmt.\n");
477 if (!def_stmt
|| gimple_nop_p (def_stmt
))
480 def_bb
= gimple_bb (def_stmt
);
481 if (!flow_bb_inside_loop_p (loop
, def_bb
))
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
488 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
489 DEF_STMT must have already been processed, because this should be the
490 only way that STMT, which is a reduction-phi, was put in the worklist,
491 as there should be no other uses for DEF_STMT in the loop. So we just
492 check that everything is as expected, and we are done. */
493 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
494 bb
= gimple_bb (stmt
);
495 if (gimple_code (stmt
) == GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
497 && gimple_code (def_stmt
) != GIMPLE_PHI
498 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
499 && bb
->loop_father
== def_bb
->loop_father
)
501 if (dump_enabled_p ())
502 dump_printf_loc (MSG_NOTE
, vect_location
,
503 "reduc-stmt defining reduc-phi in the same nest.\n");
504 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
505 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
506 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
507 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
508 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
512 /* case 3a: outer-loop stmt defining an inner-loop stmt:
513 outer-loop-header-bb:
519 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
521 if (dump_enabled_p ())
522 dump_printf_loc (MSG_NOTE
, vect_location
,
523 "outer-loop def-stmt defining inner-loop stmt.\n");
527 case vect_unused_in_scope
:
528 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
529 vect_used_in_scope
: vect_unused_in_scope
;
532 case vect_used_in_outer_by_reduction
:
533 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
534 relevant
= vect_used_by_reduction
;
537 case vect_used_in_outer
:
538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
539 relevant
= vect_used_in_scope
;
542 case vect_used_in_scope
:
550 /* case 3b: inner-loop stmt defining an outer-loop stmt:
551 outer-loop-header-bb:
555 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
557 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
559 if (dump_enabled_p ())
560 dump_printf_loc (MSG_NOTE
, vect_location
,
561 "inner-loop def-stmt defining outer-loop stmt.\n");
565 case vect_unused_in_scope
:
566 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
567 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
568 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
571 case vect_used_by_reduction
:
572 case vect_used_only_live
:
573 relevant
= vect_used_in_outer_by_reduction
;
576 case vect_used_in_scope
:
577 relevant
= vect_used_in_outer
;
584 /* We are also not interested in uses on loop PHI backedges that are
585 inductions. Otherwise we'll needlessly vectorize the IV increment
586 and cause hybrid SLP for SLP inductions. Unless the PHI is live
588 else if (gimple_code (stmt
) == GIMPLE_PHI
589 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
590 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
591 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
594 if (dump_enabled_p ())
595 dump_printf_loc (MSG_NOTE
, vect_location
,
596 "induction value on backedge.\n");
601 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
606 /* Function vect_mark_stmts_to_be_vectorized.
608 Not all stmts in the loop need to be vectorized. For example:
617 Stmt 1 and 3 do not need to be vectorized, because loop control and
618 addressing of vectorized data-refs are handled differently.
620 This pass detects such stmts. */
623 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
625 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
626 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
627 unsigned int nbbs
= loop
->num_nodes
;
628 gimple_stmt_iterator si
;
631 stmt_vec_info stmt_vinfo
;
635 enum vect_relevant relevant
;
637 if (dump_enabled_p ())
638 dump_printf_loc (MSG_NOTE
, vect_location
,
639 "=== vect_mark_stmts_to_be_vectorized ===\n");
641 auto_vec
<gimple
*, 64> worklist
;
643 /* 1. Init worklist. */
644 for (i
= 0; i
< nbbs
; i
++)
647 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
650 if (dump_enabled_p ())
652 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
653 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
656 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
657 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
659 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
661 stmt
= gsi_stmt (si
);
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
668 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
669 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
673 /* 2. Process_worklist */
674 while (worklist
.length () > 0)
679 stmt
= worklist
.pop ();
680 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
683 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
686 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
687 (DEF_STMT) as relevant/irrelevant according to the relevance property
689 stmt_vinfo
= vinfo_for_stmt (stmt
);
690 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
692 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
693 propagated as is to the DEF_STMTs of its USEs.
695 One exception is when STMT has been identified as defining a reduction
696 variable; in this case we set the relevance to vect_used_by_reduction.
697 This is because we distinguish between two kinds of relevant stmts -
698 those that are used by a reduction computation, and those that are
699 (also) used by a regular computation. This allows us later on to
700 identify stmts that are used solely by a reduction, and therefore the
701 order of the results that they produce does not have to be kept. */
703 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
705 case vect_reduction_def
:
706 gcc_assert (relevant
!= vect_unused_in_scope
);
707 if (relevant
!= vect_unused_in_scope
708 && relevant
!= vect_used_in_scope
709 && relevant
!= vect_used_by_reduction
710 && relevant
!= vect_used_only_live
)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
714 "unsupported use of reduction.\n");
719 case vect_nested_cycle
:
720 if (relevant
!= vect_unused_in_scope
721 && relevant
!= vect_used_in_outer_by_reduction
722 && relevant
!= vect_used_in_outer
)
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
726 "unsupported use of nested cycle.\n");
732 case vect_double_reduction_def
:
733 if (relevant
!= vect_unused_in_scope
734 && relevant
!= vect_used_by_reduction
735 && relevant
!= vect_used_only_live
)
737 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
739 "unsupported use of double reduction.\n");
749 if (is_pattern_stmt_p (stmt_vinfo
))
751 /* Pattern statements are not inserted into the code, so
752 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
753 have to scan the RHS or function arguments instead. */
754 if (is_gimple_assign (stmt
))
756 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
757 tree op
= gimple_assign_rhs1 (stmt
);
760 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
762 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
763 relevant
, &worklist
, false)
764 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
765 relevant
, &worklist
, false))
769 for (; i
< gimple_num_ops (stmt
); i
++)
771 op
= gimple_op (stmt
, i
);
772 if (TREE_CODE (op
) == SSA_NAME
773 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
778 else if (is_gimple_call (stmt
))
780 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
782 tree arg
= gimple_call_arg (stmt
, i
);
783 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
790 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
792 tree op
= USE_FROM_PTR (use_p
);
793 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
798 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
800 gather_scatter_info gs_info
;
801 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
803 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
807 } /* while worklist */
813 /* Function vect_model_simple_cost.
815 Models cost for simple operations, i.e. those that only emit ncopies of a
816 single op. Right now, this does not account for multiple insns that could
817 be generated for the single vector op. We will handle that shortly. */
820 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
821 enum vect_def_type
*dt
,
823 stmt_vector_for_cost
*prologue_cost_vec
,
824 stmt_vector_for_cost
*body_cost_vec
)
827 int inside_cost
= 0, prologue_cost
= 0;
829 /* The SLP costs were already calculated during SLP tree build. */
830 if (PURE_SLP_STMT (stmt_info
))
833 /* Cost the "broadcast" of a scalar operand in to a vector operand.
834 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
836 for (i
= 0; i
< ndts
; i
++)
837 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
838 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
839 stmt_info
, 0, vect_prologue
);
841 /* Pass the inside-of-loop statements to the target-specific cost model. */
842 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
843 stmt_info
, 0, vect_body
);
845 if (dump_enabled_p ())
846 dump_printf_loc (MSG_NOTE
, vect_location
,
847 "vect_model_simple_cost: inside_cost = %d, "
848 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
852 /* Model cost for type demotion and promotion operations. PWR is normally
853 zero for single-step promotions and demotions. It will be one if
854 two-step promotion/demotion is required, and so on. Each additional
855 step doubles the number of instructions required. */
858 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
859 enum vect_def_type
*dt
, int pwr
)
862 int inside_cost
= 0, prologue_cost
= 0;
863 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
864 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
865 void *target_cost_data
;
867 /* The SLP costs were already calculated during SLP tree build. */
868 if (PURE_SLP_STMT (stmt_info
))
872 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
874 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
876 for (i
= 0; i
< pwr
+ 1; i
++)
878 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
880 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
881 vec_promote_demote
, stmt_info
, 0,
885 /* FORNOW: Assuming maximum 2 args per stmts. */
886 for (i
= 0; i
< 2; i
++)
887 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
888 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
889 stmt_info
, 0, vect_prologue
);
891 if (dump_enabled_p ())
892 dump_printf_loc (MSG_NOTE
, vect_location
,
893 "vect_model_promotion_demotion_cost: inside_cost = %d, "
894 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
897 /* Function vect_model_store_cost
899 Models cost for stores. In the case of grouped accesses, one access
900 has the overhead of the grouped access attributed to it. */
903 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
904 vect_memory_access_type memory_access_type
,
905 vec_load_store_type vls_type
, slp_tree slp_node
,
906 stmt_vector_for_cost
*prologue_cost_vec
,
907 stmt_vector_for_cost
*body_cost_vec
)
909 unsigned int inside_cost
= 0, prologue_cost
= 0;
910 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
911 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
912 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
914 if (vls_type
== VLS_STORE_INVARIANT
)
915 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
916 stmt_info
, 0, vect_prologue
);
918 /* Grouped stores update all elements in the group at once,
919 so we want the DR for the first statement. */
920 if (!slp_node
&& grouped_access_p
)
922 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
923 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
926 /* True if we should include any once-per-group costs as well as
927 the cost of the statement itself. For SLP we only get called
928 once per group anyhow. */
929 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
931 /* We assume that the cost of a single store-lanes instruction is
932 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
933 access is instead being provided by a permute-and-store operation,
934 include the cost of the permutes. */
936 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
938 /* Uses a high and low interleave or shuffle operations for each
940 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
941 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
942 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
943 stmt_info
, 0, vect_body
);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE
, vect_location
,
947 "vect_model_store_cost: strided group_size = %d .\n",
951 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
952 /* Costs of the stores. */
953 if (memory_access_type
== VMAT_ELEMENTWISE
954 || memory_access_type
== VMAT_GATHER_SCATTER
)
956 /* N scalar stores plus extracting the elements. */
957 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
958 inside_cost
+= record_stmt_cost (body_cost_vec
,
959 ncopies
* assumed_nunits
,
960 scalar_store
, stmt_info
, 0, vect_body
);
963 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
965 if (memory_access_type
== VMAT_ELEMENTWISE
966 || memory_access_type
== VMAT_STRIDED_SLP
)
968 /* N scalar stores plus extracting the elements. */
969 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
970 inside_cost
+= record_stmt_cost (body_cost_vec
,
971 ncopies
* assumed_nunits
,
972 vec_to_scalar
, stmt_info
, 0, vect_body
);
975 if (dump_enabled_p ())
976 dump_printf_loc (MSG_NOTE
, vect_location
,
977 "vect_model_store_cost: inside_cost = %d, "
978 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
982 /* Calculate cost of DR's memory access. */
984 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
985 unsigned int *inside_cost
,
986 stmt_vector_for_cost
*body_cost_vec
)
988 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
989 gimple
*stmt
= DR_STMT (dr
);
990 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
992 switch (alignment_support_scheme
)
996 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
997 vector_store
, stmt_info
, 0,
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE
, vect_location
,
1002 "vect_model_store_cost: aligned.\n");
1006 case dr_unaligned_supported
:
1008 /* Here, we assign an additional cost for the unaligned store. */
1009 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1010 unaligned_store
, stmt_info
,
1011 DR_MISALIGNMENT (dr
), vect_body
);
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE
, vect_location
,
1014 "vect_model_store_cost: unaligned supported by "
1019 case dr_unaligned_unsupported
:
1021 *inside_cost
= VECT_MAX_COST
;
1023 if (dump_enabled_p ())
1024 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1025 "vect_model_store_cost: unsupported access.\n");
1035 /* Function vect_model_load_cost
1037 Models cost for loads. In the case of grouped accesses, one access has
1038 the overhead of the grouped access attributed to it. Since unaligned
1039 accesses are supported for loads, we also account for the costs of the
1040 access scheme chosen. */
1043 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1044 vect_memory_access_type memory_access_type
,
1046 stmt_vector_for_cost
*prologue_cost_vec
,
1047 stmt_vector_for_cost
*body_cost_vec
)
1049 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1050 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1051 unsigned int inside_cost
= 0, prologue_cost
= 0;
1052 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1054 /* Grouped loads read all elements in the group at once,
1055 so we want the DR for the first statement. */
1056 if (!slp_node
&& grouped_access_p
)
1058 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1059 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1062 /* True if we should include any once-per-group costs as well as
1063 the cost of the statement itself. For SLP we only get called
1064 once per group anyhow. */
1065 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1072 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1074 /* Uses an even and odd extract operations or shuffle operations
1075 for each needed permute. */
1076 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1077 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1078 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1079 stmt_info
, 0, vect_body
);
1081 if (dump_enabled_p ())
1082 dump_printf_loc (MSG_NOTE
, vect_location
,
1083 "vect_model_load_cost: strided group_size = %d .\n",
1087 /* The loads themselves. */
1088 if (memory_access_type
== VMAT_ELEMENTWISE
1089 || memory_access_type
== VMAT_GATHER_SCATTER
)
1091 /* N scalar loads plus gathering them into a vector. */
1092 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1093 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1094 inside_cost
+= record_stmt_cost (body_cost_vec
,
1095 ncopies
* assumed_nunits
,
1096 scalar_load
, stmt_info
, 0, vect_body
);
1099 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1100 &inside_cost
, &prologue_cost
,
1101 prologue_cost_vec
, body_cost_vec
, true);
1102 if (memory_access_type
== VMAT_ELEMENTWISE
1103 || memory_access_type
== VMAT_STRIDED_SLP
)
1104 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1105 stmt_info
, 0, vect_body
);
1107 if (dump_enabled_p ())
1108 dump_printf_loc (MSG_NOTE
, vect_location
,
1109 "vect_model_load_cost: inside_cost = %d, "
1110 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1114 /* Calculate cost of DR's memory access. */
1116 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1117 bool add_realign_cost
, unsigned int *inside_cost
,
1118 unsigned int *prologue_cost
,
1119 stmt_vector_for_cost
*prologue_cost_vec
,
1120 stmt_vector_for_cost
*body_cost_vec
,
1121 bool record_prologue_costs
)
1123 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1124 gimple
*stmt
= DR_STMT (dr
);
1125 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1127 switch (alignment_support_scheme
)
1131 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1132 stmt_info
, 0, vect_body
);
1134 if (dump_enabled_p ())
1135 dump_printf_loc (MSG_NOTE
, vect_location
,
1136 "vect_model_load_cost: aligned.\n");
1140 case dr_unaligned_supported
:
1142 /* Here, we assign an additional cost for the unaligned load. */
1143 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1144 unaligned_load
, stmt_info
,
1145 DR_MISALIGNMENT (dr
), vect_body
);
1147 if (dump_enabled_p ())
1148 dump_printf_loc (MSG_NOTE
, vect_location
,
1149 "vect_model_load_cost: unaligned supported by "
1154 case dr_explicit_realign
:
1156 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1157 vector_load
, stmt_info
, 0, vect_body
);
1158 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1159 vec_perm
, stmt_info
, 0, vect_body
);
1161 /* FIXME: If the misalignment remains fixed across the iterations of
1162 the containing loop, the following cost should be added to the
1164 if (targetm
.vectorize
.builtin_mask_for_load
)
1165 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1166 stmt_info
, 0, vect_body
);
1168 if (dump_enabled_p ())
1169 dump_printf_loc (MSG_NOTE
, vect_location
,
1170 "vect_model_load_cost: explicit realign\n");
1174 case dr_explicit_realign_optimized
:
1176 if (dump_enabled_p ())
1177 dump_printf_loc (MSG_NOTE
, vect_location
,
1178 "vect_model_load_cost: unaligned software "
1181 /* Unaligned software pipeline has a load of an address, an initial
1182 load, and possibly a mask operation to "prime" the loop. However,
1183 if this is an access in a group of loads, which provide grouped
1184 access, then the above cost should only be considered for one
1185 access in the group. Inside the loop, there is a load op
1186 and a realignment op. */
1188 if (add_realign_cost
&& record_prologue_costs
)
1190 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1191 vector_stmt
, stmt_info
,
1193 if (targetm
.vectorize
.builtin_mask_for_load
)
1194 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1195 vector_stmt
, stmt_info
,
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1200 stmt_info
, 0, vect_body
);
1201 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1202 stmt_info
, 0, vect_body
);
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE
, vect_location
,
1206 "vect_model_load_cost: explicit realign optimized"
1212 case dr_unaligned_unsupported
:
1214 *inside_cost
= VECT_MAX_COST
;
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1218 "vect_model_load_cost: unsupported access.\n");
1227 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1228 the loop preheader for the vectorized stmt STMT. */
1231 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1234 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1237 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1238 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1242 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1246 if (nested_in_vect_loop_p (loop
, stmt
))
1249 pe
= loop_preheader_edge (loop
);
1250 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1251 gcc_assert (!new_bb
);
1255 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1257 gimple_stmt_iterator gsi_bb_start
;
1259 gcc_assert (bb_vinfo
);
1260 bb
= BB_VINFO_BB (bb_vinfo
);
1261 gsi_bb_start
= gsi_after_labels (bb
);
1262 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1266 if (dump_enabled_p ())
1268 dump_printf_loc (MSG_NOTE
, vect_location
,
1269 "created new init_stmt: ");
1270 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1274 /* Function vect_init_vector.
1276 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1277 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1278 vector type a vector with all elements equal to VAL is created first.
1279 Place the initialization at BSI if it is not NULL. Otherwise, place the
1280 initialization at the loop preheader.
1281 Return the DEF of INIT_STMT.
1282 It will be used in the vectorization of STMT. */
1285 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1290 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1291 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1293 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1294 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1296 /* Scalar boolean value should be transformed into
1297 all zeros or all ones value before building a vector. */
1298 if (VECTOR_BOOLEAN_TYPE_P (type
))
1300 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1301 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1303 if (CONSTANT_CLASS_P (val
))
1304 val
= integer_zerop (val
) ? false_val
: true_val
;
1307 new_temp
= make_ssa_name (TREE_TYPE (type
));
1308 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1309 val
, true_val
, false_val
);
1310 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1314 else if (CONSTANT_CLASS_P (val
))
1315 val
= fold_convert (TREE_TYPE (type
), val
);
1318 new_temp
= make_ssa_name (TREE_TYPE (type
));
1319 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1320 init_stmt
= gimple_build_assign (new_temp
,
1321 fold_build1 (VIEW_CONVERT_EXPR
,
1325 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1326 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1330 val
= build_vector_from_val (type
, val
);
1333 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1334 init_stmt
= gimple_build_assign (new_temp
, val
);
1335 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1339 /* Function vect_get_vec_def_for_operand_1.
1341 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1342 DT that will be used in the vectorized stmt. */
1345 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1349 stmt_vec_info def_stmt_info
= NULL
;
1353 /* operand is a constant or a loop invariant. */
1354 case vect_constant_def
:
1355 case vect_external_def
:
1356 /* Code should use vect_get_vec_def_for_operand. */
1359 /* operand is defined inside the loop. */
1360 case vect_internal_def
:
1362 /* Get the def from the vectorized stmt. */
1363 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1365 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1366 /* Get vectorized pattern statement. */
1368 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1369 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1370 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1371 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1372 gcc_assert (vec_stmt
);
1373 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1374 vec_oprnd
= PHI_RESULT (vec_stmt
);
1375 else if (is_gimple_call (vec_stmt
))
1376 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1378 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1382 /* operand is defined by a loop header phi. */
1383 case vect_reduction_def
:
1384 case vect_double_reduction_def
:
1385 case vect_nested_cycle
:
1386 case vect_induction_def
:
1388 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1390 /* Get the def from the vectorized stmt. */
1391 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1392 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1393 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1394 vec_oprnd
= PHI_RESULT (vec_stmt
);
1396 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1406 /* Function vect_get_vec_def_for_operand.
1408 OP is an operand in STMT. This function returns a (vector) def that will be
1409 used in the vectorized stmt for STMT.
1411 In the case that OP is an SSA_NAME which is defined in the loop, then
1412 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1414 In case OP is an invariant or constant, a new stmt that creates a vector def
1415 needs to be introduced. VECTYPE may be used to specify a required type for
1416 vector invariant. */
1419 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1422 enum vect_def_type dt
;
1424 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1425 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1427 if (dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE
, vect_location
,
1430 "vect_get_vec_def_for_operand: ");
1431 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1432 dump_printf (MSG_NOTE
, "\n");
1435 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1436 gcc_assert (is_simple_use
);
1437 if (def_stmt
&& dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1440 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1443 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1445 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1449 vector_type
= vectype
;
1450 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1451 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1452 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1454 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1456 gcc_assert (vector_type
);
1457 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1460 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1464 /* Function vect_get_vec_def_for_stmt_copy
1466 Return a vector-def for an operand. This function is used when the
1467 vectorized stmt to be created (by the caller to this function) is a "copy"
1468 created in case the vectorized result cannot fit in one vector, and several
1469 copies of the vector-stmt are required. In this case the vector-def is
1470 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1471 of the stmt that defines VEC_OPRND.
1472 DT is the type of the vector def VEC_OPRND.
1475 In case the vectorization factor (VF) is bigger than the number
1476 of elements that can fit in a vectype (nunits), we have to generate
1477 more than one vector stmt to vectorize the scalar stmt. This situation
1478 arises when there are multiple data-types operated upon in the loop; the
1479 smallest data-type determines the VF, and as a result, when vectorizing
1480 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1481 vector stmt (each computing a vector of 'nunits' results, and together
1482 computing 'VF' results in each iteration). This function is called when
1483 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1484 which VF=16 and nunits=4, so the number of copies required is 4):
1486 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1488 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1489 VS1.1: vx.1 = memref1 VS1.2
1490 VS1.2: vx.2 = memref2 VS1.3
1491 VS1.3: vx.3 = memref3
1493 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1494 VSnew.1: vz1 = vx.1 + ... VSnew.2
1495 VSnew.2: vz2 = vx.2 + ... VSnew.3
1496 VSnew.3: vz3 = vx.3 + ...
1498 The vectorization of S1 is explained in vectorizable_load.
1499 The vectorization of S2:
1500 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1501 the function 'vect_get_vec_def_for_operand' is called to
1502 get the relevant vector-def for each operand of S2. For operand x it
1503 returns the vector-def 'vx.0'.
1505 To create the remaining copies of the vector-stmt (VSnew.j), this
1506 function is called to get the relevant vector-def for each operand. It is
1507 obtained from the respective VS1.j stmt, which is recorded in the
1508 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1510 For example, to obtain the vector-def 'vx.1' in order to create the
1511 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1512 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1513 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1514 and return its def ('vx.1').
1515 Overall, to create the above sequence this function will be called 3 times:
1516 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1517 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1518 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1521 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1523 gimple
*vec_stmt_for_operand
;
1524 stmt_vec_info def_stmt_info
;
1526 /* Do nothing; can reuse same def. */
1527 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1530 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1531 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1532 gcc_assert (def_stmt_info
);
1533 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1534 gcc_assert (vec_stmt_for_operand
);
1535 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1536 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1538 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1543 /* Get vectorized definitions for the operands to create a copy of an original
1544 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1547 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1548 vec
<tree
> *vec_oprnds0
,
1549 vec
<tree
> *vec_oprnds1
)
1551 tree vec_oprnd
= vec_oprnds0
->pop ();
1553 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1554 vec_oprnds0
->quick_push (vec_oprnd
);
1556 if (vec_oprnds1
&& vec_oprnds1
->length ())
1558 vec_oprnd
= vec_oprnds1
->pop ();
1559 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1560 vec_oprnds1
->quick_push (vec_oprnd
);
1565 /* Get vectorized definitions for OP0 and OP1. */
1568 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1569 vec
<tree
> *vec_oprnds0
,
1570 vec
<tree
> *vec_oprnds1
,
1575 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1576 auto_vec
<tree
> ops (nops
);
1577 auto_vec
<vec
<tree
> > vec_defs (nops
);
1579 ops
.quick_push (op0
);
1581 ops
.quick_push (op1
);
1583 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1585 *vec_oprnds0
= vec_defs
[0];
1587 *vec_oprnds1
= vec_defs
[1];
1593 vec_oprnds0
->create (1);
1594 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1595 vec_oprnds0
->quick_push (vec_oprnd
);
1599 vec_oprnds1
->create (1);
1600 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1601 vec_oprnds1
->quick_push (vec_oprnd
);
1606 /* Helper function called by vect_finish_replace_stmt and
1607 vect_finish_stmt_generation. Set the location of the new
1608 statement and create a stmt_vec_info for it. */
1611 vect_finish_stmt_generation_1 (gimple
*stmt
, gimple
*vec_stmt
)
1613 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1614 vec_info
*vinfo
= stmt_info
->vinfo
;
1616 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1618 if (dump_enabled_p ())
1620 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1621 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1624 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1626 /* While EH edges will generally prevent vectorization, stmt might
1627 e.g. be in a must-not-throw region. Ensure newly created stmts
1628 that could throw are part of the same region. */
1629 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1630 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1631 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1634 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1635 which sets the same scalar result as STMT did. */
1638 vect_finish_replace_stmt (gimple
*stmt
, gimple
*vec_stmt
)
1640 gcc_assert (gimple_get_lhs (stmt
) == gimple_get_lhs (vec_stmt
));
1642 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1643 gsi_replace (&gsi
, vec_stmt
, false);
1645 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1648 /* Function vect_finish_stmt_generation.
1650 Insert a new stmt. */
1653 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1654 gimple_stmt_iterator
*gsi
)
1656 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1658 if (!gsi_end_p (*gsi
)
1659 && gimple_has_mem_ops (vec_stmt
))
1661 gimple
*at_stmt
= gsi_stmt (*gsi
);
1662 tree vuse
= gimple_vuse (at_stmt
);
1663 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1665 tree vdef
= gimple_vdef (at_stmt
);
1666 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1667 /* If we have an SSA vuse and insert a store, update virtual
1668 SSA form to avoid triggering the renamer. Do so only
1669 if we can easily see all uses - which is what almost always
1670 happens with the way vectorized stmts are inserted. */
1671 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1672 && ((is_gimple_assign (vec_stmt
)
1673 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1674 || (is_gimple_call (vec_stmt
)
1675 && !(gimple_call_flags (vec_stmt
)
1676 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1678 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1679 gimple_set_vdef (vec_stmt
, new_vdef
);
1680 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1684 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1685 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1688 /* We want to vectorize a call to combined function CFN with function
1689 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1690 as the types of all inputs. Check whether this is possible using
1691 an internal function, returning its code if so or IFN_LAST if not. */
1694 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1695 tree vectype_out
, tree vectype_in
)
1698 if (internal_fn_p (cfn
))
1699 ifn
= as_internal_fn (cfn
);
1701 ifn
= associated_internal_fn (fndecl
);
1702 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1704 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1705 if (info
.vectorizable
)
1707 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1708 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1709 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1710 OPTIMIZE_FOR_SPEED
))
1718 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1719 gimple_stmt_iterator
*);
1721 /* Check whether a load or store statement in the loop described by
1722 LOOP_VINFO is possible in a fully-masked loop. This is testing
1723 whether the vectorizer pass has the appropriate support, as well as
1724 whether the target does.
1726 VLS_TYPE says whether the statement is a load or store and VECTYPE
1727 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1728 says how the load or store is going to be implemented and GROUP_SIZE
1729 is the number of load or store statements in the containing group.
1731 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1732 supported, otherwise record the required mask types. */
1735 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1736 vec_load_store_type vls_type
, int group_size
,
1737 vect_memory_access_type memory_access_type
)
1739 /* Invariant loads need no special support. */
1740 if (memory_access_type
== VMAT_INVARIANT
)
1743 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1744 machine_mode vecmode
= TYPE_MODE (vectype
);
1745 bool is_load
= (vls_type
== VLS_LOAD
);
1746 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1749 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1750 : !vect_store_lanes_supported (vectype
, group_size
, true))
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1754 "can't use a fully-masked loop because the"
1755 " target doesn't have an appropriate masked"
1756 " load/store-lanes instruction.\n");
1757 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1760 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1761 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1765 if (memory_access_type
!= VMAT_CONTIGUOUS
1766 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1768 /* Element X of the data must come from iteration i * VF + X of the
1769 scalar loop. We need more work to support other mappings. */
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1772 "can't use a fully-masked loop because an access"
1773 " isn't contiguous.\n");
1774 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1778 machine_mode mask_mode
;
1779 if (!(targetm
.vectorize
.get_mask_mode
1780 (GET_MODE_NUNITS (vecmode
),
1781 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1782 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1784 if (dump_enabled_p ())
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1786 "can't use a fully-masked loop because the target"
1787 " doesn't have the appropriate masked load or"
1789 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1792 /* We might load more scalars than we need for permuting SLP loads.
1793 We checked in get_group_load_store_type that the extra elements
1794 don't leak into a new vector. */
1795 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1796 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1797 unsigned int nvectors
;
1798 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1799 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1804 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1805 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1806 that needs to be applied to all loads and stores in a vectorized loop.
1807 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1809 MASK_TYPE is the type of both masks. If new statements are needed,
1810 insert them before GSI. */
1813 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1814 gimple_stmt_iterator
*gsi
)
1816 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1820 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1821 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1822 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1823 vec_mask
, loop_mask
);
1824 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1828 /* STMT is a non-strided load or store, meaning that it accesses
1829 elements with a known constant step. Return -1 if that step
1830 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1833 compare_step_with_zero (gimple
*stmt
)
1835 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1836 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1837 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
1841 /* If the target supports a permute mask that reverses the elements in
1842 a vector of type VECTYPE, return that mask, otherwise return null. */
1845 perm_mask_for_reverse (tree vectype
)
1847 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1849 /* The encoding has a single stepped pattern. */
1850 vec_perm_builder
sel (nunits
, 1, 3);
1851 for (int i
= 0; i
< 3; ++i
)
1852 sel
.quick_push (nunits
- 1 - i
);
1854 vec_perm_indices
indices (sel
, 1, nunits
);
1855 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1857 return vect_gen_perm_mask_checked (vectype
, indices
);
1860 /* STMT is either a masked or unconditional store. Return the value
1864 vect_get_store_rhs (gimple
*stmt
)
1866 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt
))
1868 gcc_assert (gimple_assign_single_p (assign
));
1869 return gimple_assign_rhs1 (assign
);
1871 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
1873 internal_fn ifn
= gimple_call_internal_fn (call
);
1874 gcc_assert (ifn
== IFN_MASK_STORE
);
1875 return gimple_call_arg (stmt
, 3);
1880 /* A subroutine of get_load_store_type, with a subset of the same
1881 arguments. Handle the case where STMT is part of a grouped load
1884 For stores, the statements in the group are all consecutive
1885 and there is no gap at the end. For loads, the statements in the
1886 group might not be consecutive; there can be gaps between statements
1887 as well as at the end. */
1890 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1891 bool masked_p
, vec_load_store_type vls_type
,
1892 vect_memory_access_type
*memory_access_type
)
1894 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1895 vec_info
*vinfo
= stmt_info
->vinfo
;
1896 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1897 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1898 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1899 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1900 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1901 bool single_element_p
= (stmt
== first_stmt
1902 && !GROUP_NEXT_ELEMENT (stmt_info
));
1903 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1904 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1906 /* True if the vectorized statements would access beyond the last
1907 statement in the group. */
1908 bool overrun_p
= false;
1910 /* True if we can cope with such overrun by peeling for gaps, so that
1911 there is at least one final scalar iteration after the vector loop. */
1912 bool can_overrun_p
= (!masked_p
1913 && vls_type
== VLS_LOAD
1917 /* There can only be a gap at the end of the group if the stride is
1918 known at compile time. */
1919 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1921 /* Stores can't yet have gaps. */
1922 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1926 if (STMT_VINFO_STRIDED_P (stmt_info
))
1928 /* Try to use consecutive accesses of GROUP_SIZE elements,
1929 separated by the stride, until we have a complete vector.
1930 Fall back to scalar accesses if that isn't possible. */
1931 if (multiple_p (nunits
, group_size
))
1932 *memory_access_type
= VMAT_STRIDED_SLP
;
1934 *memory_access_type
= VMAT_ELEMENTWISE
;
1938 overrun_p
= loop_vinfo
&& gap
!= 0;
1939 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1942 "Grouped store with gaps requires"
1943 " non-consecutive accesses\n");
1946 /* An overrun is fine if the trailing elements are smaller
1947 than the alignment boundary B. Every vector access will
1948 be a multiple of B and so we are guaranteed to access a
1949 non-gap element in the same B-sized block. */
1951 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1952 / vect_get_scalar_dr_size (first_dr
)))
1954 if (overrun_p
&& !can_overrun_p
)
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1958 "Peeling for outer loop is not supported\n");
1961 *memory_access_type
= VMAT_CONTIGUOUS
;
1966 /* We can always handle this case using elementwise accesses,
1967 but see if something more efficient is available. */
1968 *memory_access_type
= VMAT_ELEMENTWISE
;
1970 /* If there is a gap at the end of the group then these optimizations
1971 would access excess elements in the last iteration. */
1972 bool would_overrun_p
= (gap
!= 0);
1973 /* An overrun is fine if the trailing elements are smaller than the
1974 alignment boundary B. Every vector access will be a multiple of B
1975 and so we are guaranteed to access a non-gap element in the
1976 same B-sized block. */
1979 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1980 / vect_get_scalar_dr_size (first_dr
)))
1981 would_overrun_p
= false;
1983 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1984 && (can_overrun_p
|| !would_overrun_p
)
1985 && compare_step_with_zero (stmt
) > 0)
1987 /* First cope with the degenerate case of a single-element
1989 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
1990 *memory_access_type
= VMAT_CONTIGUOUS
;
1992 /* Otherwise try using LOAD/STORE_LANES. */
1993 if (*memory_access_type
== VMAT_ELEMENTWISE
1994 && (vls_type
== VLS_LOAD
1995 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
1996 : vect_store_lanes_supported (vectype
, group_size
,
1999 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2000 overrun_p
= would_overrun_p
;
2003 /* If that fails, try using permuting loads. */
2004 if (*memory_access_type
== VMAT_ELEMENTWISE
2005 && (vls_type
== VLS_LOAD
2006 ? vect_grouped_load_supported (vectype
, single_element_p
,
2008 : vect_grouped_store_supported (vectype
, group_size
)))
2010 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2011 overrun_p
= would_overrun_p
;
2016 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
2018 /* STMT is the leader of the group. Check the operands of all the
2019 stmts of the group. */
2020 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
2023 tree op
= vect_get_store_rhs (next_stmt
);
2025 enum vect_def_type dt
;
2026 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2030 "use not simple.\n");
2033 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2039 gcc_assert (can_overrun_p
);
2040 if (dump_enabled_p ())
2041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2042 "Data access with gaps requires scalar "
2044 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2050 /* A subroutine of get_load_store_type, with a subset of the same
2051 arguments. Handle the case where STMT is a load or store that
2052 accesses consecutive elements with a negative step. */
2054 static vect_memory_access_type
2055 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
2056 vec_load_store_type vls_type
,
2057 unsigned int ncopies
)
2059 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2060 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2061 dr_alignment_support alignment_support_scheme
;
2065 if (dump_enabled_p ())
2066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2067 "multiple types with negative step.\n");
2068 return VMAT_ELEMENTWISE
;
2071 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
2072 if (alignment_support_scheme
!= dr_aligned
2073 && alignment_support_scheme
!= dr_unaligned_supported
)
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2077 "negative step but alignment required.\n");
2078 return VMAT_ELEMENTWISE
;
2081 if (vls_type
== VLS_STORE_INVARIANT
)
2083 if (dump_enabled_p ())
2084 dump_printf_loc (MSG_NOTE
, vect_location
,
2085 "negative step with invariant source;"
2086 " no permute needed.\n");
2087 return VMAT_CONTIGUOUS_DOWN
;
2090 if (!perm_mask_for_reverse (vectype
))
2092 if (dump_enabled_p ())
2093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2094 "negative step and reversing not supported.\n");
2095 return VMAT_ELEMENTWISE
;
2098 return VMAT_CONTIGUOUS_REVERSE
;
2101 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2102 if there is a memory access type that the vectorized form can use,
2103 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2104 or scatters, fill in GS_INFO accordingly.
2106 SLP says whether we're performing SLP rather than loop vectorization.
2107 MASKED_P is true if the statement is conditional on a vectorized mask.
2108 VECTYPE is the vector type that the vectorized statements will use.
2109 NCOPIES is the number of vector statements that will be needed. */
2112 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
, bool masked_p
,
2113 vec_load_store_type vls_type
, unsigned int ncopies
,
2114 vect_memory_access_type
*memory_access_type
,
2115 gather_scatter_info
*gs_info
)
2117 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2118 vec_info
*vinfo
= stmt_info
->vinfo
;
2119 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2120 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2121 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2123 *memory_access_type
= VMAT_GATHER_SCATTER
;
2125 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
2127 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
2128 &gs_info
->offset_dt
,
2129 &gs_info
->offset_vectype
))
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2133 "%s index use not simple.\n",
2134 vls_type
== VLS_LOAD
? "gather" : "scatter");
2138 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2140 if (!get_group_load_store_type (stmt
, vectype
, slp
, masked_p
, vls_type
,
2141 memory_access_type
))
2144 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2147 *memory_access_type
= VMAT_ELEMENTWISE
;
2151 int cmp
= compare_step_with_zero (stmt
);
2153 *memory_access_type
= get_negative_load_store_type
2154 (stmt
, vectype
, vls_type
, ncopies
);
2157 gcc_assert (vls_type
== VLS_LOAD
);
2158 *memory_access_type
= VMAT_INVARIANT
;
2161 *memory_access_type
= VMAT_CONTIGUOUS
;
2164 if ((*memory_access_type
== VMAT_ELEMENTWISE
2165 || *memory_access_type
== VMAT_STRIDED_SLP
)
2166 && !nunits
.is_constant ())
2168 if (dump_enabled_p ())
2169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2170 "Not using elementwise accesses due to variable "
2171 "vectorization factor.\n");
2175 /* FIXME: At the moment the cost model seems to underestimate the
2176 cost of using elementwise accesses. This check preserves the
2177 traditional behavior until that can be fixed. */
2178 if (*memory_access_type
== VMAT_ELEMENTWISE
2179 && !STMT_VINFO_STRIDED_P (stmt_info
)
2180 && !(stmt
== GROUP_FIRST_ELEMENT (stmt_info
)
2181 && !GROUP_NEXT_ELEMENT (stmt_info
)
2182 && !pow2p_hwi (GROUP_SIZE (stmt_info
))))
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2186 "not falling back to elementwise accesses\n");
2192 /* Return true if boolean argument MASK is suitable for vectorizing
2193 conditional load or store STMT. When returning true, store the
2194 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2197 vect_check_load_store_mask (gimple
*stmt
, tree mask
, tree
*mask_vectype_out
)
2199 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2203 "mask argument is not a boolean.\n");
2207 if (TREE_CODE (mask
) != SSA_NAME
)
2209 if (dump_enabled_p ())
2210 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2211 "mask argument is not an SSA name.\n");
2215 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2217 enum vect_def_type dt
;
2219 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &def_stmt
, &dt
,
2222 if (dump_enabled_p ())
2223 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2224 "mask use not simple.\n");
2228 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2230 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2232 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2234 if (dump_enabled_p ())
2235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2236 "could not find an appropriate vector mask type.\n");
2240 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2241 TYPE_VECTOR_SUBPARTS (vectype
)))
2243 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2246 "vector mask type ");
2247 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, mask_vectype
);
2248 dump_printf (MSG_MISSED_OPTIMIZATION
,
2249 " does not match vector data type ");
2250 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
2251 dump_printf (MSG_MISSED_OPTIMIZATION
, ".\n");
2256 *mask_vectype_out
= mask_vectype
;
2260 /* Return true if stored value RHS is suitable for vectorizing store
2261 statement STMT. When returning true, store the type of the
2262 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2263 store in *VLS_TYPE_OUT. */
2266 vect_check_store_rhs (gimple
*stmt
, tree rhs
, tree
*rhs_vectype_out
,
2267 vec_load_store_type
*vls_type_out
)
2269 /* In the case this is a store from a constant make sure
2270 native_encode_expr can handle it. */
2271 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2273 if (dump_enabled_p ())
2274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2275 "cannot encode constant as a byte sequence.\n");
2279 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2281 enum vect_def_type dt
;
2283 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &def_stmt
, &dt
,
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2288 "use not simple.\n");
2292 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2293 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2297 "incompatible vector types.\n");
2301 *rhs_vectype_out
= rhs_vectype
;
2302 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2303 *vls_type_out
= VLS_STORE_INVARIANT
;
2305 *vls_type_out
= VLS_STORE
;
2309 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2310 Note that we support masks with floating-point type, in which case the
2311 floats are interpreted as a bitmask. */
2314 vect_build_all_ones_mask (gimple
*stmt
, tree masktype
)
2316 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2317 return build_int_cst (masktype
, -1);
2318 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2320 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2321 mask
= build_vector_from_val (masktype
, mask
);
2322 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2324 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2328 for (int j
= 0; j
< 6; ++j
)
2330 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2331 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2332 mask
= build_vector_from_val (masktype
, mask
);
2333 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2338 /* Build an all-zero merge value of type VECTYPE while vectorizing
2339 STMT as a gather load. */
2342 vect_build_zero_merge_argument (gimple
*stmt
, tree vectype
)
2345 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2346 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2347 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2351 for (int j
= 0; j
< 6; ++j
)
2353 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2354 merge
= build_real (TREE_TYPE (vectype
), r
);
2358 merge
= build_vector_from_val (vectype
, merge
);
2359 return vect_init_vector (stmt
, merge
, vectype
, NULL
);
2362 /* Build a gather load call while vectorizing STMT. Insert new instructions
2363 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2364 operation. If the load is conditional, MASK is the unvectorized
2365 condition, otherwise MASK is null. */
2368 vect_build_gather_load_calls (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2369 gimple
**vec_stmt
, gather_scatter_info
*gs_info
,
2372 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2373 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2374 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2375 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2376 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2377 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2378 edge pe
= loop_preheader_edge (loop
);
2379 enum { NARROW
, NONE
, WIDEN
} modifier
;
2380 poly_uint64 gather_off_nunits
2381 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2383 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2384 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2385 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2386 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2387 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2388 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2389 tree scaletype
= TREE_VALUE (arglist
);
2390 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2391 && (!mask
|| types_compatible_p (srctype
, masktype
)));
2393 tree perm_mask
= NULL_TREE
;
2394 tree mask_perm_mask
= NULL_TREE
;
2395 if (known_eq (nunits
, gather_off_nunits
))
2397 else if (known_eq (nunits
* 2, gather_off_nunits
))
2401 /* Currently widening gathers and scatters are only supported for
2402 fixed-length vectors. */
2403 int count
= gather_off_nunits
.to_constant ();
2404 vec_perm_builder
sel (count
, count
, 1);
2405 for (int i
= 0; i
< count
; ++i
)
2406 sel
.quick_push (i
| (count
/ 2));
2408 vec_perm_indices
indices (sel
, 1, count
);
2409 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2412 else if (known_eq (nunits
, gather_off_nunits
* 2))
2416 /* Currently narrowing gathers and scatters are only supported for
2417 fixed-length vectors. */
2418 int count
= nunits
.to_constant ();
2419 vec_perm_builder
sel (count
, count
, 1);
2420 sel
.quick_grow (count
);
2421 for (int i
= 0; i
< count
; ++i
)
2422 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2423 vec_perm_indices
indices (sel
, 2, count
);
2424 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2430 for (int i
= 0; i
< count
; ++i
)
2431 sel
[i
] = i
| (count
/ 2);
2432 indices
.new_vector (sel
, 2, count
);
2433 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2439 tree vec_dest
= vect_create_destination_var (gimple_get_lhs (stmt
),
2442 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2443 if (!is_gimple_min_invariant (ptr
))
2446 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2447 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2448 gcc_assert (!new_bb
);
2451 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2453 tree vec_oprnd0
= NULL_TREE
;
2454 tree vec_mask
= NULL_TREE
;
2455 tree src_op
= NULL_TREE
;
2456 tree mask_op
= NULL_TREE
;
2457 tree prev_res
= NULL_TREE
;
2458 stmt_vec_info prev_stmt_info
= NULL
;
2462 src_op
= vect_build_zero_merge_argument (stmt
, rettype
);
2463 mask_op
= vect_build_all_ones_mask (stmt
, masktype
);
2466 for (int j
= 0; j
< ncopies
; ++j
)
2470 if (modifier
== WIDEN
&& (j
& 1))
2471 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2472 perm_mask
, stmt
, gsi
);
2475 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt
);
2478 = vect_get_vec_def_for_stmt_copy (gs_info
->offset_dt
, vec_oprnd0
);
2480 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2482 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2483 TYPE_VECTOR_SUBPARTS (idxtype
)));
2484 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2485 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2486 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2487 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2493 if (mask_perm_mask
&& (j
& 1))
2494 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2495 mask_perm_mask
, stmt
, gsi
);
2499 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2503 enum vect_def_type dt
;
2504 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2505 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2509 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2512 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
)),
2513 TYPE_VECTOR_SUBPARTS (masktype
)));
2514 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2515 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2516 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
2518 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2525 new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2528 if (!useless_type_conversion_p (vectype
, rettype
))
2530 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2531 TYPE_VECTOR_SUBPARTS (rettype
)));
2532 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2533 gimple_call_set_lhs (new_stmt
, op
);
2534 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2535 var
= make_ssa_name (vec_dest
);
2536 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2537 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2541 var
= make_ssa_name (vec_dest
, new_stmt
);
2542 gimple_call_set_lhs (new_stmt
, var
);
2545 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2547 if (modifier
== NARROW
)
2554 var
= permute_vec_elements (prev_res
, var
, perm_mask
, stmt
, gsi
);
2555 new_stmt
= SSA_NAME_DEF_STMT (var
);
2558 if (prev_stmt_info
== NULL
)
2559 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2561 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2562 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2566 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2569 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2570 gimple
**vec_stmt
, slp_tree slp_node
,
2571 tree vectype_in
, enum vect_def_type
*dt
)
2574 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2575 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2577 unsigned HOST_WIDE_INT nunits
, num_bytes
;
2579 op
= gimple_call_arg (stmt
, 0);
2580 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2582 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
2585 /* Multiple types in SLP are handled by creating the appropriate number of
2586 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2591 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2593 gcc_assert (ncopies
>= 1);
2595 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2599 if (!TYPE_VECTOR_SUBPARTS (char_vectype
).is_constant (&num_bytes
))
2602 unsigned word_bytes
= num_bytes
/ nunits
;
2604 /* The encoding uses one stepped pattern for each byte in the word. */
2605 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2606 for (unsigned i
= 0; i
< 3; ++i
)
2607 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2608 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2610 vec_perm_indices
indices (elts
, 1, num_bytes
);
2611 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2616 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2617 if (dump_enabled_p ())
2618 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2620 if (! PURE_SLP_STMT (stmt_info
))
2622 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2623 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2624 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2625 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2630 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2633 vec
<tree
> vec_oprnds
= vNULL
;
2634 gimple
*new_stmt
= NULL
;
2635 stmt_vec_info prev_stmt_info
= NULL
;
2636 for (unsigned j
= 0; j
< ncopies
; j
++)
2640 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2642 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2644 /* Arguments are ready. create the new vector stmt. */
2647 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2649 tree tem
= make_ssa_name (char_vectype
);
2650 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2651 char_vectype
, vop
));
2652 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2653 tree tem2
= make_ssa_name (char_vectype
);
2654 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2655 tem
, tem
, bswap_vconst
);
2656 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2657 tem
= make_ssa_name (vectype
);
2658 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2660 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2662 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2669 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2671 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2673 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2676 vec_oprnds
.release ();
2680 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2681 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2682 in a single step. On success, store the binary pack code in
2686 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2687 tree_code
*convert_code
)
2689 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2690 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2694 int multi_step_cvt
= 0;
2695 auto_vec
<tree
, 8> interm_types
;
2696 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2697 &code
, &multi_step_cvt
,
2702 *convert_code
= code
;
2706 /* Function vectorizable_call.
2708 Check if GS performs a function call that can be vectorized.
2709 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2710 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2714 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2721 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2722 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2723 tree vectype_out
, vectype_in
;
2724 poly_uint64 nunits_in
;
2725 poly_uint64 nunits_out
;
2726 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2727 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2728 vec_info
*vinfo
= stmt_info
->vinfo
;
2729 tree fndecl
, new_temp
, rhs_type
;
2731 enum vect_def_type dt
[3]
2732 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2734 gimple
*new_stmt
= NULL
;
2736 vec
<tree
> vargs
= vNULL
;
2737 enum { NARROW
, NONE
, WIDEN
} modifier
;
2741 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2744 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2748 /* Is GS a vectorizable call? */
2749 stmt
= dyn_cast
<gcall
*> (gs
);
2753 if (gimple_call_internal_p (stmt
)
2754 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2755 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2756 /* Handled by vectorizable_load and vectorizable_store. */
2759 if (gimple_call_lhs (stmt
) == NULL_TREE
2760 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2763 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2765 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2767 /* Process function arguments. */
2768 rhs_type
= NULL_TREE
;
2769 vectype_in
= NULL_TREE
;
2770 nargs
= gimple_call_num_args (stmt
);
2772 /* Bail out if the function has more than three arguments, we do not have
2773 interesting builtin functions to vectorize with more than two arguments
2774 except for fma. No arguments is also not good. */
2775 if (nargs
== 0 || nargs
> 3)
2778 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2779 if (gimple_call_internal_p (stmt
)
2780 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2783 rhs_type
= unsigned_type_node
;
2786 for (i
= 0; i
< nargs
; i
++)
2790 op
= gimple_call_arg (stmt
, i
);
2792 /* We can only handle calls with arguments of the same type. */
2794 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2796 if (dump_enabled_p ())
2797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2798 "argument types differ.\n");
2802 rhs_type
= TREE_TYPE (op
);
2804 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2806 if (dump_enabled_p ())
2807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2808 "use not simple.\n");
2813 vectype_in
= opvectype
;
2815 && opvectype
!= vectype_in
)
2817 if (dump_enabled_p ())
2818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2819 "argument vector types differ.\n");
2823 /* If all arguments are external or constant defs use a vector type with
2824 the same size as the output vector type. */
2826 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2828 gcc_assert (vectype_in
);
2831 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2834 "no vectype for scalar type ");
2835 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2836 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2843 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2844 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2845 if (known_eq (nunits_in
* 2, nunits_out
))
2847 else if (known_eq (nunits_out
, nunits_in
))
2849 else if (known_eq (nunits_out
* 2, nunits_in
))
2854 /* We only handle functions that do not read or clobber memory. */
2855 if (gimple_vuse (stmt
))
2857 if (dump_enabled_p ())
2858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2859 "function reads from or writes to memory.\n");
2863 /* For now, we only vectorize functions if a target specific builtin
2864 is available. TODO -- in some cases, it might be profitable to
2865 insert the calls for pieces of the vector, in order to be able
2866 to vectorize other operations in the loop. */
2868 internal_fn ifn
= IFN_LAST
;
2869 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2870 tree callee
= gimple_call_fndecl (stmt
);
2872 /* First try using an internal function. */
2873 tree_code convert_code
= ERROR_MARK
;
2875 && (modifier
== NONE
2876 || (modifier
== NARROW
2877 && simple_integer_narrowing (vectype_out
, vectype_in
,
2879 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2882 /* If that fails, try asking for a target-specific built-in function. */
2883 if (ifn
== IFN_LAST
)
2885 if (cfn
!= CFN_LAST
)
2886 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2887 (cfn
, vectype_out
, vectype_in
);
2889 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2890 (callee
, vectype_out
, vectype_in
);
2893 if (ifn
== IFN_LAST
&& !fndecl
)
2895 if (cfn
== CFN_GOMP_SIMD_LANE
2898 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2899 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2900 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2901 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2903 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2904 { 0, 1, 2, ... vf - 1 } vector. */
2905 gcc_assert (nargs
== 0);
2907 else if (modifier
== NONE
2908 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2909 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2910 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2911 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2915 if (dump_enabled_p ())
2916 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2917 "function is not vectorizable.\n");
2924 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2925 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
2927 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
2929 /* Sanity check: make sure that at least one copy of the vectorized stmt
2930 needs to be generated. */
2931 gcc_assert (ncopies
>= 1);
2933 if (!vec_stmt
) /* transformation not required. */
2935 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2936 if (dump_enabled_p ())
2937 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2939 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
2940 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2941 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2942 vec_promote_demote
, stmt_info
, 0, vect_body
);
2949 if (dump_enabled_p ())
2950 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2953 scalar_dest
= gimple_call_lhs (stmt
);
2954 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2956 prev_stmt_info
= NULL
;
2957 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2959 tree prev_res
= NULL_TREE
;
2960 for (j
= 0; j
< ncopies
; ++j
)
2962 /* Build argument list for the vectorized call. */
2964 vargs
.create (nargs
);
2970 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2971 vec
<tree
> vec_oprnds0
;
2973 for (i
= 0; i
< nargs
; i
++)
2974 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2975 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
2976 vec_oprnds0
= vec_defs
[0];
2978 /* Arguments are ready. Create the new vector stmt. */
2979 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2982 for (k
= 0; k
< nargs
; k
++)
2984 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2985 vargs
[k
] = vec_oprndsk
[i
];
2987 if (modifier
== NARROW
)
2989 tree half_res
= make_ssa_name (vectype_in
);
2991 = gimple_build_call_internal_vec (ifn
, vargs
);
2992 gimple_call_set_lhs (call
, half_res
);
2993 gimple_call_set_nothrow (call
, true);
2995 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2998 prev_res
= half_res
;
3001 new_temp
= make_ssa_name (vec_dest
);
3002 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3003 prev_res
, half_res
);
3008 if (ifn
!= IFN_LAST
)
3009 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3011 call
= gimple_build_call_vec (fndecl
, vargs
);
3012 new_temp
= make_ssa_name (vec_dest
, call
);
3013 gimple_call_set_lhs (call
, new_temp
);
3014 gimple_call_set_nothrow (call
, true);
3017 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3018 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3021 for (i
= 0; i
< nargs
; i
++)
3023 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3024 vec_oprndsi
.release ();
3029 for (i
= 0; i
< nargs
; i
++)
3031 op
= gimple_call_arg (stmt
, i
);
3034 = vect_get_vec_def_for_operand (op
, stmt
);
3037 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
3039 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3042 vargs
.quick_push (vec_oprnd0
);
3045 if (gimple_call_internal_p (stmt
)
3046 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
3048 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3050 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3051 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3052 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
3053 new_temp
= make_ssa_name (vec_dest
);
3054 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3056 else if (modifier
== NARROW
)
3058 tree half_res
= make_ssa_name (vectype_in
);
3059 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3060 gimple_call_set_lhs (call
, half_res
);
3061 gimple_call_set_nothrow (call
, true);
3063 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3066 prev_res
= half_res
;
3069 new_temp
= make_ssa_name (vec_dest
);
3070 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3071 prev_res
, half_res
);
3076 if (ifn
!= IFN_LAST
)
3077 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3079 call
= gimple_build_call_vec (fndecl
, vargs
);
3080 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3081 gimple_call_set_lhs (call
, new_temp
);
3082 gimple_call_set_nothrow (call
, true);
3085 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3087 if (j
== (modifier
== NARROW
? 1 : 0))
3088 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3090 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3092 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3095 else if (modifier
== NARROW
)
3097 for (j
= 0; j
< ncopies
; ++j
)
3099 /* Build argument list for the vectorized call. */
3101 vargs
.create (nargs
* 2);
3107 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3108 vec
<tree
> vec_oprnds0
;
3110 for (i
= 0; i
< nargs
; i
++)
3111 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3112 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3113 vec_oprnds0
= vec_defs
[0];
3115 /* Arguments are ready. Create the new vector stmt. */
3116 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3120 for (k
= 0; k
< nargs
; k
++)
3122 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3123 vargs
.quick_push (vec_oprndsk
[i
]);
3124 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3127 if (ifn
!= IFN_LAST
)
3128 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3130 call
= gimple_build_call_vec (fndecl
, vargs
);
3131 new_temp
= make_ssa_name (vec_dest
, call
);
3132 gimple_call_set_lhs (call
, new_temp
);
3133 gimple_call_set_nothrow (call
, true);
3135 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3136 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3139 for (i
= 0; i
< nargs
; i
++)
3141 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3142 vec_oprndsi
.release ();
3147 for (i
= 0; i
< nargs
; i
++)
3149 op
= gimple_call_arg (stmt
, i
);
3153 = vect_get_vec_def_for_operand (op
, stmt
);
3155 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3159 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3161 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3163 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3166 vargs
.quick_push (vec_oprnd0
);
3167 vargs
.quick_push (vec_oprnd1
);
3170 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3171 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3172 gimple_call_set_lhs (new_stmt
, new_temp
);
3173 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3176 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3178 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3180 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3183 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3186 /* No current target implements this case. */
3191 /* The call in STMT might prevent it from being removed in dce.
3192 We however cannot remove it here, due to the way the ssa name
3193 it defines is mapped to the new definition. So just replace
3194 rhs of the statement with something harmless. */
3199 type
= TREE_TYPE (scalar_dest
);
3200 if (is_pattern_stmt_p (stmt_info
))
3201 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3203 lhs
= gimple_call_lhs (stmt
);
3205 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3206 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3207 set_vinfo_for_stmt (stmt
, NULL
);
3208 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3209 gsi_replace (gsi
, new_stmt
, false);
3215 struct simd_call_arg_info
3219 HOST_WIDE_INT linear_step
;
3220 enum vect_def_type dt
;
3222 bool simd_lane_linear
;
3225 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3226 is linear within simd lane (but not within whole loop), note it in
3230 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3231 struct simd_call_arg_info
*arginfo
)
3233 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3235 if (!is_gimple_assign (def_stmt
)
3236 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3237 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3240 tree base
= gimple_assign_rhs1 (def_stmt
);
3241 HOST_WIDE_INT linear_step
= 0;
3242 tree v
= gimple_assign_rhs2 (def_stmt
);
3243 while (TREE_CODE (v
) == SSA_NAME
)
3246 def_stmt
= SSA_NAME_DEF_STMT (v
);
3247 if (is_gimple_assign (def_stmt
))
3248 switch (gimple_assign_rhs_code (def_stmt
))
3251 t
= gimple_assign_rhs2 (def_stmt
);
3252 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3254 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3255 v
= gimple_assign_rhs1 (def_stmt
);
3258 t
= gimple_assign_rhs2 (def_stmt
);
3259 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3261 linear_step
= tree_to_shwi (t
);
3262 v
= gimple_assign_rhs1 (def_stmt
);
3265 t
= gimple_assign_rhs1 (def_stmt
);
3266 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3267 || (TYPE_PRECISION (TREE_TYPE (v
))
3268 < TYPE_PRECISION (TREE_TYPE (t
))))
3277 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3279 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3280 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3285 arginfo
->linear_step
= linear_step
;
3287 arginfo
->simd_lane_linear
= true;
3293 /* Return the number of elements in vector type VECTYPE, which is associated
3294 with a SIMD clone. At present these vectors always have a constant
3297 static unsigned HOST_WIDE_INT
3298 simd_clone_subparts (tree vectype
)
3300 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3303 /* Function vectorizable_simd_clone_call.
3305 Check if STMT performs a function call that can be vectorized
3306 by calling a simd clone of the function.
3307 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3308 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3309 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3312 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3313 gimple
**vec_stmt
, slp_tree slp_node
)
3318 tree vec_oprnd0
= NULL_TREE
;
3319 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3321 unsigned int nunits
;
3322 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3323 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3324 vec_info
*vinfo
= stmt_info
->vinfo
;
3325 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3326 tree fndecl
, new_temp
;
3328 gimple
*new_stmt
= NULL
;
3330 auto_vec
<simd_call_arg_info
> arginfo
;
3331 vec
<tree
> vargs
= vNULL
;
3333 tree lhs
, rtype
, ratype
;
3334 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3336 /* Is STMT a vectorizable call? */
3337 if (!is_gimple_call (stmt
))
3340 fndecl
= gimple_call_fndecl (stmt
);
3341 if (fndecl
== NULL_TREE
)
3344 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3345 if (node
== NULL
|| node
->simd_clones
== NULL
)
3348 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3351 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3355 if (gimple_call_lhs (stmt
)
3356 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3359 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3361 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3363 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3370 /* Process function arguments. */
3371 nargs
= gimple_call_num_args (stmt
);
3373 /* Bail out if the function has zero arguments. */
3377 arginfo
.reserve (nargs
, true);
3379 for (i
= 0; i
< nargs
; i
++)
3381 simd_call_arg_info thisarginfo
;
3384 thisarginfo
.linear_step
= 0;
3385 thisarginfo
.align
= 0;
3386 thisarginfo
.op
= NULL_TREE
;
3387 thisarginfo
.simd_lane_linear
= false;
3389 op
= gimple_call_arg (stmt
, i
);
3390 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3391 &thisarginfo
.vectype
)
3392 || thisarginfo
.dt
== vect_uninitialized_def
)
3394 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3396 "use not simple.\n");
3400 if (thisarginfo
.dt
== vect_constant_def
3401 || thisarginfo
.dt
== vect_external_def
)
3402 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3404 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3406 /* For linear arguments, the analyze phase should have saved
3407 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3408 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3409 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3411 gcc_assert (vec_stmt
);
3412 thisarginfo
.linear_step
3413 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3415 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3416 thisarginfo
.simd_lane_linear
3417 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3418 == boolean_true_node
);
3419 /* If loop has been peeled for alignment, we need to adjust it. */
3420 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3421 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3422 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3424 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3425 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3426 tree opt
= TREE_TYPE (thisarginfo
.op
);
3427 bias
= fold_convert (TREE_TYPE (step
), bias
);
3428 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3430 = fold_build2 (POINTER_TYPE_P (opt
)
3431 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3432 thisarginfo
.op
, bias
);
3436 && thisarginfo
.dt
!= vect_constant_def
3437 && thisarginfo
.dt
!= vect_external_def
3439 && TREE_CODE (op
) == SSA_NAME
3440 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3442 && tree_fits_shwi_p (iv
.step
))
3444 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3445 thisarginfo
.op
= iv
.base
;
3447 else if ((thisarginfo
.dt
== vect_constant_def
3448 || thisarginfo
.dt
== vect_external_def
)
3449 && POINTER_TYPE_P (TREE_TYPE (op
)))
3450 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3451 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3453 if (POINTER_TYPE_P (TREE_TYPE (op
))
3454 && !thisarginfo
.linear_step
3456 && thisarginfo
.dt
!= vect_constant_def
3457 && thisarginfo
.dt
!= vect_external_def
3460 && TREE_CODE (op
) == SSA_NAME
)
3461 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3463 arginfo
.quick_push (thisarginfo
);
3466 unsigned HOST_WIDE_INT vf
;
3467 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3469 if (dump_enabled_p ())
3470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3471 "not considering SIMD clones; not yet supported"
3472 " for variable-width vectors.\n");
3476 unsigned int badness
= 0;
3477 struct cgraph_node
*bestn
= NULL
;
3478 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3479 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3481 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3482 n
= n
->simdclone
->next_clone
)
3484 unsigned int this_badness
= 0;
3485 if (n
->simdclone
->simdlen
> vf
3486 || n
->simdclone
->nargs
!= nargs
)
3488 if (n
->simdclone
->simdlen
< vf
)
3489 this_badness
+= (exact_log2 (vf
)
3490 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3491 if (n
->simdclone
->inbranch
)
3492 this_badness
+= 2048;
3493 int target_badness
= targetm
.simd_clone
.usable (n
);
3494 if (target_badness
< 0)
3496 this_badness
+= target_badness
* 512;
3497 /* FORNOW: Have to add code to add the mask argument. */
3498 if (n
->simdclone
->inbranch
)
3500 for (i
= 0; i
< nargs
; i
++)
3502 switch (n
->simdclone
->args
[i
].arg_type
)
3504 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3505 if (!useless_type_conversion_p
3506 (n
->simdclone
->args
[i
].orig_type
,
3507 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3509 else if (arginfo
[i
].dt
== vect_constant_def
3510 || arginfo
[i
].dt
== vect_external_def
3511 || arginfo
[i
].linear_step
)
3514 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3515 if (arginfo
[i
].dt
!= vect_constant_def
3516 && arginfo
[i
].dt
!= vect_external_def
)
3519 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3520 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3521 if (arginfo
[i
].dt
== vect_constant_def
3522 || arginfo
[i
].dt
== vect_external_def
3523 || (arginfo
[i
].linear_step
3524 != n
->simdclone
->args
[i
].linear_step
))
3527 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3528 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3529 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3530 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3531 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3532 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3536 case SIMD_CLONE_ARG_TYPE_MASK
:
3539 if (i
== (size_t) -1)
3541 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3546 if (arginfo
[i
].align
)
3547 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3548 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3550 if (i
== (size_t) -1)
3552 if (bestn
== NULL
|| this_badness
< badness
)
3555 badness
= this_badness
;
3562 for (i
= 0; i
< nargs
; i
++)
3563 if ((arginfo
[i
].dt
== vect_constant_def
3564 || arginfo
[i
].dt
== vect_external_def
)
3565 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3568 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3570 if (arginfo
[i
].vectype
== NULL
3571 || (simd_clone_subparts (arginfo
[i
].vectype
)
3572 > bestn
->simdclone
->simdlen
))
3576 fndecl
= bestn
->decl
;
3577 nunits
= bestn
->simdclone
->simdlen
;
3578 ncopies
= vf
/ nunits
;
3580 /* If the function isn't const, only allow it in simd loops where user
3581 has asserted that at least nunits consecutive iterations can be
3582 performed using SIMD instructions. */
3583 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3584 && gimple_vuse (stmt
))
3587 /* Sanity check: make sure that at least one copy of the vectorized stmt
3588 needs to be generated. */
3589 gcc_assert (ncopies
>= 1);
3591 if (!vec_stmt
) /* transformation not required. */
3593 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3594 for (i
= 0; i
< nargs
; i
++)
3595 if ((bestn
->simdclone
->args
[i
].arg_type
3596 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3597 || (bestn
->simdclone
->args
[i
].arg_type
3598 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3600 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3602 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3603 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3604 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3605 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3606 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3607 tree sll
= arginfo
[i
].simd_lane_linear
3608 ? boolean_true_node
: boolean_false_node
;
3609 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3611 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3612 if (dump_enabled_p ())
3613 dump_printf_loc (MSG_NOTE
, vect_location
,
3614 "=== vectorizable_simd_clone_call ===\n");
3615 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3621 if (dump_enabled_p ())
3622 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3625 scalar_dest
= gimple_call_lhs (stmt
);
3626 vec_dest
= NULL_TREE
;
3631 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3632 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3633 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3636 rtype
= TREE_TYPE (ratype
);
3640 prev_stmt_info
= NULL
;
3641 for (j
= 0; j
< ncopies
; ++j
)
3643 /* Build argument list for the vectorized call. */
3645 vargs
.create (nargs
);
3649 for (i
= 0; i
< nargs
; i
++)
3651 unsigned int k
, l
, m
, o
;
3653 op
= gimple_call_arg (stmt
, i
);
3654 switch (bestn
->simdclone
->args
[i
].arg_type
)
3656 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3657 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3658 o
= nunits
/ simd_clone_subparts (atype
);
3659 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3661 if (simd_clone_subparts (atype
)
3662 < simd_clone_subparts (arginfo
[i
].vectype
))
3664 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3665 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
3666 / simd_clone_subparts (atype
));
3667 gcc_assert ((k
& (k
- 1)) == 0);
3670 = vect_get_vec_def_for_operand (op
, stmt
);
3673 vec_oprnd0
= arginfo
[i
].op
;
3674 if ((m
& (k
- 1)) == 0)
3676 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3679 arginfo
[i
].op
= vec_oprnd0
;
3681 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3683 bitsize_int ((m
& (k
- 1)) * prec
));
3685 = gimple_build_assign (make_ssa_name (atype
),
3687 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3688 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3692 k
= (simd_clone_subparts (atype
)
3693 / simd_clone_subparts (arginfo
[i
].vectype
));
3694 gcc_assert ((k
& (k
- 1)) == 0);
3695 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3697 vec_alloc (ctor_elts
, k
);
3700 for (l
= 0; l
< k
; l
++)
3702 if (m
== 0 && l
== 0)
3704 = vect_get_vec_def_for_operand (op
, stmt
);
3707 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3709 arginfo
[i
].op
= vec_oprnd0
;
3712 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3716 vargs
.safe_push (vec_oprnd0
);
3719 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3721 = gimple_build_assign (make_ssa_name (atype
),
3723 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3724 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3729 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3730 vargs
.safe_push (op
);
3732 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3733 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3738 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3743 edge pe
= loop_preheader_edge (loop
);
3744 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3745 gcc_assert (!new_bb
);
3747 if (arginfo
[i
].simd_lane_linear
)
3749 vargs
.safe_push (arginfo
[i
].op
);
3752 tree phi_res
= copy_ssa_name (op
);
3753 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3754 set_vinfo_for_stmt (new_phi
,
3755 new_stmt_vec_info (new_phi
, loop_vinfo
));
3756 add_phi_arg (new_phi
, arginfo
[i
].op
,
3757 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3759 = POINTER_TYPE_P (TREE_TYPE (op
))
3760 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3761 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3762 ? sizetype
: TREE_TYPE (op
);
3764 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3766 tree tcst
= wide_int_to_tree (type
, cst
);
3767 tree phi_arg
= copy_ssa_name (op
);
3769 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3770 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3771 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3772 set_vinfo_for_stmt (new_stmt
,
3773 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3774 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3776 arginfo
[i
].op
= phi_res
;
3777 vargs
.safe_push (phi_res
);
3782 = POINTER_TYPE_P (TREE_TYPE (op
))
3783 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3784 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3785 ? sizetype
: TREE_TYPE (op
);
3787 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3789 tree tcst
= wide_int_to_tree (type
, cst
);
3790 new_temp
= make_ssa_name (TREE_TYPE (op
));
3791 new_stmt
= gimple_build_assign (new_temp
, code
,
3792 arginfo
[i
].op
, tcst
);
3793 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3794 vargs
.safe_push (new_temp
);
3797 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3798 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3799 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3800 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3801 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3802 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3808 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3811 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
3813 new_temp
= create_tmp_var (ratype
);
3814 else if (simd_clone_subparts (vectype
)
3815 == simd_clone_subparts (rtype
))
3816 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3818 new_temp
= make_ssa_name (rtype
, new_stmt
);
3819 gimple_call_set_lhs (new_stmt
, new_temp
);
3821 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3825 if (simd_clone_subparts (vectype
) < nunits
)
3828 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3829 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
3830 k
= nunits
/ simd_clone_subparts (vectype
);
3831 gcc_assert ((k
& (k
- 1)) == 0);
3832 for (l
= 0; l
< k
; l
++)
3837 t
= build_fold_addr_expr (new_temp
);
3838 t
= build2 (MEM_REF
, vectype
, t
,
3839 build_int_cst (TREE_TYPE (t
), l
* bytes
));
3842 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3843 bitsize_int (prec
), bitsize_int (l
* prec
));
3845 = gimple_build_assign (make_ssa_name (vectype
), t
);
3846 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3847 if (j
== 0 && l
== 0)
3848 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3850 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3852 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3857 tree clobber
= build_constructor (ratype
, NULL
);
3858 TREE_THIS_VOLATILE (clobber
) = 1;
3859 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3860 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3864 else if (simd_clone_subparts (vectype
) > nunits
)
3866 unsigned int k
= (simd_clone_subparts (vectype
)
3867 / simd_clone_subparts (rtype
));
3868 gcc_assert ((k
& (k
- 1)) == 0);
3869 if ((j
& (k
- 1)) == 0)
3870 vec_alloc (ret_ctor_elts
, k
);
3873 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
3874 for (m
= 0; m
< o
; m
++)
3876 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3877 size_int (m
), NULL_TREE
, NULL_TREE
);
3879 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3880 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3881 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3882 gimple_assign_lhs (new_stmt
));
3884 tree clobber
= build_constructor (ratype
, NULL
);
3885 TREE_THIS_VOLATILE (clobber
) = 1;
3886 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3887 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3890 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3891 if ((j
& (k
- 1)) != k
- 1)
3893 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3895 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3896 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3898 if ((unsigned) j
== k
- 1)
3899 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3901 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3903 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3908 tree t
= build_fold_addr_expr (new_temp
);
3909 t
= build2 (MEM_REF
, vectype
, t
,
3910 build_int_cst (TREE_TYPE (t
), 0));
3912 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3913 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3914 tree clobber
= build_constructor (ratype
, NULL
);
3915 TREE_THIS_VOLATILE (clobber
) = 1;
3916 vect_finish_stmt_generation (stmt
,
3917 gimple_build_assign (new_temp
,
3923 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3925 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3927 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3932 /* The call in STMT might prevent it from being removed in dce.
3933 We however cannot remove it here, due to the way the ssa name
3934 it defines is mapped to the new definition. So just replace
3935 rhs of the statement with something harmless. */
3942 type
= TREE_TYPE (scalar_dest
);
3943 if (is_pattern_stmt_p (stmt_info
))
3944 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3946 lhs
= gimple_call_lhs (stmt
);
3947 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3950 new_stmt
= gimple_build_nop ();
3951 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3952 set_vinfo_for_stmt (stmt
, NULL
);
3953 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3954 gsi_replace (gsi
, new_stmt
, true);
3955 unlink_stmt_vdef (stmt
);
3961 /* Function vect_gen_widened_results_half
3963 Create a vector stmt whose code, type, number of arguments, and result
3964 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3965 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3966 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3967 needs to be created (DECL is a function-decl of a target-builtin).
3968 STMT is the original scalar stmt that we are vectorizing. */
3971 vect_gen_widened_results_half (enum tree_code code
,
3973 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3974 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3980 /* Generate half of the widened result: */
3981 if (code
== CALL_EXPR
)
3983 /* Target specific support */
3984 if (op_type
== binary_op
)
3985 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3987 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3988 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3989 gimple_call_set_lhs (new_stmt
, new_temp
);
3993 /* Generic support */
3994 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3995 if (op_type
!= binary_op
)
3997 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3998 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3999 gimple_assign_set_lhs (new_stmt
, new_temp
);
4001 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4007 /* Get vectorized definitions for loop-based vectorization. For the first
4008 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4009 scalar operand), and for the rest we get a copy with
4010 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4011 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4012 The vectors are collected into VEC_OPRNDS. */
4015 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
4016 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4020 /* Get first vector operand. */
4021 /* All the vector operands except the very first one (that is scalar oprnd)
4023 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4024 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
4026 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
4028 vec_oprnds
->quick_push (vec_oprnd
);
4030 /* Get second vector operand. */
4031 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
4032 vec_oprnds
->quick_push (vec_oprnd
);
4036 /* For conversion in multiple steps, continue to get operands
4039 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
4043 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4044 For multi-step conversions store the resulting vectors and call the function
4048 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4049 int multi_step_cvt
, gimple
*stmt
,
4051 gimple_stmt_iterator
*gsi
,
4052 slp_tree slp_node
, enum tree_code code
,
4053 stmt_vec_info
*prev_stmt_info
)
4056 tree vop0
, vop1
, new_tmp
, vec_dest
;
4058 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4060 vec_dest
= vec_dsts
.pop ();
4062 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4064 /* Create demotion operation. */
4065 vop0
= (*vec_oprnds
)[i
];
4066 vop1
= (*vec_oprnds
)[i
+ 1];
4067 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4068 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4069 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4070 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4073 /* Store the resulting vector for next recursive call. */
4074 (*vec_oprnds
)[i
/2] = new_tmp
;
4077 /* This is the last step of the conversion sequence. Store the
4078 vectors in SLP_NODE or in vector info of the scalar statement
4079 (or in STMT_VINFO_RELATED_STMT chain). */
4081 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4084 if (!*prev_stmt_info
)
4085 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4087 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
4089 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4094 /* For multi-step demotion operations we first generate demotion operations
4095 from the source type to the intermediate types, and then combine the
4096 results (stored in VEC_OPRNDS) in demotion operation to the destination
4100 /* At each level of recursion we have half of the operands we had at the
4102 vec_oprnds
->truncate ((i
+1)/2);
4103 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4104 stmt
, vec_dsts
, gsi
, slp_node
,
4105 VEC_PACK_TRUNC_EXPR
,
4109 vec_dsts
.quick_push (vec_dest
);
4113 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4114 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4115 the resulting vectors and call the function recursively. */
4118 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4119 vec
<tree
> *vec_oprnds1
,
4120 gimple
*stmt
, tree vec_dest
,
4121 gimple_stmt_iterator
*gsi
,
4122 enum tree_code code1
,
4123 enum tree_code code2
, tree decl1
,
4124 tree decl2
, int op_type
)
4127 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4128 gimple
*new_stmt1
, *new_stmt2
;
4129 vec
<tree
> vec_tmp
= vNULL
;
4131 vec_tmp
.create (vec_oprnds0
->length () * 2);
4132 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4134 if (op_type
== binary_op
)
4135 vop1
= (*vec_oprnds1
)[i
];
4139 /* Generate the two halves of promotion operation. */
4140 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4141 op_type
, vec_dest
, gsi
, stmt
);
4142 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4143 op_type
, vec_dest
, gsi
, stmt
);
4144 if (is_gimple_call (new_stmt1
))
4146 new_tmp1
= gimple_call_lhs (new_stmt1
);
4147 new_tmp2
= gimple_call_lhs (new_stmt2
);
4151 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4152 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4155 /* Store the results for the next step. */
4156 vec_tmp
.quick_push (new_tmp1
);
4157 vec_tmp
.quick_push (new_tmp2
);
4160 vec_oprnds0
->release ();
4161 *vec_oprnds0
= vec_tmp
;
4165 /* Check if STMT performs a conversion operation, that can be vectorized.
4166 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4167 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4168 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4171 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4172 gimple
**vec_stmt
, slp_tree slp_node
)
4176 tree op0
, op1
= NULL_TREE
;
4177 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4178 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4179 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4180 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4181 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4182 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4185 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4187 gimple
*new_stmt
= NULL
;
4188 stmt_vec_info prev_stmt_info
;
4189 poly_uint64 nunits_in
;
4190 poly_uint64 nunits_out
;
4191 tree vectype_out
, vectype_in
;
4193 tree lhs_type
, rhs_type
;
4194 enum { NARROW
, NONE
, WIDEN
} modifier
;
4195 vec
<tree
> vec_oprnds0
= vNULL
;
4196 vec
<tree
> vec_oprnds1
= vNULL
;
4198 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4199 vec_info
*vinfo
= stmt_info
->vinfo
;
4200 int multi_step_cvt
= 0;
4201 vec
<tree
> interm_types
= vNULL
;
4202 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4204 unsigned short fltsz
;
4206 /* Is STMT a vectorizable conversion? */
4208 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4211 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4215 if (!is_gimple_assign (stmt
))
4218 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4221 code
= gimple_assign_rhs_code (stmt
);
4222 if (!CONVERT_EXPR_CODE_P (code
)
4223 && code
!= FIX_TRUNC_EXPR
4224 && code
!= FLOAT_EXPR
4225 && code
!= WIDEN_MULT_EXPR
4226 && code
!= WIDEN_LSHIFT_EXPR
)
4229 op_type
= TREE_CODE_LENGTH (code
);
4231 /* Check types of lhs and rhs. */
4232 scalar_dest
= gimple_assign_lhs (stmt
);
4233 lhs_type
= TREE_TYPE (scalar_dest
);
4234 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4236 op0
= gimple_assign_rhs1 (stmt
);
4237 rhs_type
= TREE_TYPE (op0
);
4239 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4240 && !((INTEGRAL_TYPE_P (lhs_type
)
4241 && INTEGRAL_TYPE_P (rhs_type
))
4242 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4243 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4246 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4247 && ((INTEGRAL_TYPE_P (lhs_type
)
4248 && !type_has_mode_precision_p (lhs_type
))
4249 || (INTEGRAL_TYPE_P (rhs_type
)
4250 && !type_has_mode_precision_p (rhs_type
))))
4252 if (dump_enabled_p ())
4253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4254 "type conversion to/from bit-precision unsupported."
4259 /* Check the operands of the operation. */
4260 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4262 if (dump_enabled_p ())
4263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4264 "use not simple.\n");
4267 if (op_type
== binary_op
)
4271 op1
= gimple_assign_rhs2 (stmt
);
4272 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4273 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4275 if (CONSTANT_CLASS_P (op0
))
4276 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4278 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4282 if (dump_enabled_p ())
4283 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4284 "use not simple.\n");
4289 /* If op0 is an external or constant defs use a vector type of
4290 the same size as the output vector type. */
4292 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4294 gcc_assert (vectype_in
);
4297 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4300 "no vectype for scalar type ");
4301 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4302 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4308 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4309 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4311 if (dump_enabled_p ())
4313 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4314 "can't convert between boolean and non "
4316 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4317 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4323 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4324 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4325 if (known_eq (nunits_out
, nunits_in
))
4327 else if (multiple_p (nunits_out
, nunits_in
))
4331 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4335 /* Multiple types in SLP are handled by creating the appropriate number of
4336 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4340 else if (modifier
== NARROW
)
4341 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4343 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4345 /* Sanity check: make sure that at least one copy of the vectorized stmt
4346 needs to be generated. */
4347 gcc_assert (ncopies
>= 1);
4349 bool found_mode
= false;
4350 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4351 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4352 opt_scalar_mode rhs_mode_iter
;
4354 /* Supportable by target? */
4358 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4360 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4365 if (dump_enabled_p ())
4366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4367 "conversion not supported by target.\n");
4371 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4372 &code1
, &code2
, &multi_step_cvt
,
4375 /* Binary widening operation can only be supported directly by the
4377 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4381 if (code
!= FLOAT_EXPR
4382 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4385 fltsz
= GET_MODE_SIZE (lhs_mode
);
4386 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4388 rhs_mode
= rhs_mode_iter
.require ();
4389 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4393 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4394 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4395 if (cvt_type
== NULL_TREE
)
4398 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4400 if (!supportable_convert_operation (code
, vectype_out
,
4401 cvt_type
, &decl1
, &codecvt1
))
4404 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4405 cvt_type
, &codecvt1
,
4406 &codecvt2
, &multi_step_cvt
,
4410 gcc_assert (multi_step_cvt
== 0);
4412 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4413 vectype_in
, &code1
, &code2
,
4414 &multi_step_cvt
, &interm_types
))
4424 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4425 codecvt2
= ERROR_MARK
;
4429 interm_types
.safe_push (cvt_type
);
4430 cvt_type
= NULL_TREE
;
4435 gcc_assert (op_type
== unary_op
);
4436 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4437 &code1
, &multi_step_cvt
,
4441 if (code
!= FIX_TRUNC_EXPR
4442 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4446 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4447 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4448 if (cvt_type
== NULL_TREE
)
4450 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4453 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4454 &code1
, &multi_step_cvt
,
4463 if (!vec_stmt
) /* transformation not required. */
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_NOTE
, vect_location
,
4467 "=== vectorizable_conversion ===\n");
4468 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4470 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4471 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4473 else if (modifier
== NARROW
)
4475 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4476 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4480 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4481 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4483 interm_types
.release ();
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_NOTE
, vect_location
,
4490 "transform conversion. ncopies = %d.\n", ncopies
);
4492 if (op_type
== binary_op
)
4494 if (CONSTANT_CLASS_P (op0
))
4495 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4496 else if (CONSTANT_CLASS_P (op1
))
4497 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4500 /* In case of multi-step conversion, we first generate conversion operations
4501 to the intermediate types, and then from that types to the final one.
4502 We create vector destinations for the intermediate type (TYPES) received
4503 from supportable_*_operation, and store them in the correct order
4504 for future use in vect_create_vectorized_*_stmts (). */
4505 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4506 vec_dest
= vect_create_destination_var (scalar_dest
,
4507 (cvt_type
&& modifier
== WIDEN
)
4508 ? cvt_type
: vectype_out
);
4509 vec_dsts
.quick_push (vec_dest
);
4513 for (i
= interm_types
.length () - 1;
4514 interm_types
.iterate (i
, &intermediate_type
); i
--)
4516 vec_dest
= vect_create_destination_var (scalar_dest
,
4518 vec_dsts
.quick_push (vec_dest
);
4523 vec_dest
= vect_create_destination_var (scalar_dest
,
4525 ? vectype_out
: cvt_type
);
4529 if (modifier
== WIDEN
)
4531 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4532 if (op_type
== binary_op
)
4533 vec_oprnds1
.create (1);
4535 else if (modifier
== NARROW
)
4536 vec_oprnds0
.create (
4537 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4539 else if (code
== WIDEN_LSHIFT_EXPR
)
4540 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4543 prev_stmt_info
= NULL
;
4547 for (j
= 0; j
< ncopies
; j
++)
4550 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4552 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4554 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4556 /* Arguments are ready, create the new vector stmt. */
4557 if (code1
== CALL_EXPR
)
4559 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4560 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4561 gimple_call_set_lhs (new_stmt
, new_temp
);
4565 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4566 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4567 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4568 gimple_assign_set_lhs (new_stmt
, new_temp
);
4571 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4573 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4576 if (!prev_stmt_info
)
4577 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4579 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4580 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4587 /* In case the vectorization factor (VF) is bigger than the number
4588 of elements that we can fit in a vectype (nunits), we have to
4589 generate more than one vector stmt - i.e - we need to "unroll"
4590 the vector stmt by a factor VF/nunits. */
4591 for (j
= 0; j
< ncopies
; j
++)
4598 if (code
== WIDEN_LSHIFT_EXPR
)
4603 /* Store vec_oprnd1 for every vector stmt to be created
4604 for SLP_NODE. We check during the analysis that all
4605 the shift arguments are the same. */
4606 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4607 vec_oprnds1
.quick_push (vec_oprnd1
);
4609 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4613 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4614 &vec_oprnds1
, slp_node
);
4618 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4619 vec_oprnds0
.quick_push (vec_oprnd0
);
4620 if (op_type
== binary_op
)
4622 if (code
== WIDEN_LSHIFT_EXPR
)
4625 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4626 vec_oprnds1
.quick_push (vec_oprnd1
);
4632 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4633 vec_oprnds0
.truncate (0);
4634 vec_oprnds0
.quick_push (vec_oprnd0
);
4635 if (op_type
== binary_op
)
4637 if (code
== WIDEN_LSHIFT_EXPR
)
4640 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4642 vec_oprnds1
.truncate (0);
4643 vec_oprnds1
.quick_push (vec_oprnd1
);
4647 /* Arguments are ready. Create the new vector stmts. */
4648 for (i
= multi_step_cvt
; i
>= 0; i
--)
4650 tree this_dest
= vec_dsts
[i
];
4651 enum tree_code c1
= code1
, c2
= code2
;
4652 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4657 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4659 stmt
, this_dest
, gsi
,
4660 c1
, c2
, decl1
, decl2
,
4664 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4668 if (codecvt1
== CALL_EXPR
)
4670 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4671 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4672 gimple_call_set_lhs (new_stmt
, new_temp
);
4676 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4677 new_temp
= make_ssa_name (vec_dest
);
4678 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4682 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4685 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4688 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4691 if (!prev_stmt_info
)
4692 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4694 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4695 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4700 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4704 /* In case the vectorization factor (VF) is bigger than the number
4705 of elements that we can fit in a vectype (nunits), we have to
4706 generate more than one vector stmt - i.e - we need to "unroll"
4707 the vector stmt by a factor VF/nunits. */
4708 for (j
= 0; j
< ncopies
; j
++)
4712 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4716 vec_oprnds0
.truncate (0);
4717 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4718 vect_pow2 (multi_step_cvt
) - 1);
4721 /* Arguments are ready. Create the new vector stmts. */
4723 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4725 if (codecvt1
== CALL_EXPR
)
4727 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4728 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4729 gimple_call_set_lhs (new_stmt
, new_temp
);
4733 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4734 new_temp
= make_ssa_name (vec_dest
);
4735 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4739 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4740 vec_oprnds0
[i
] = new_temp
;
4743 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4744 stmt
, vec_dsts
, gsi
,
4749 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4753 vec_oprnds0
.release ();
4754 vec_oprnds1
.release ();
4755 interm_types
.release ();
4761 /* Function vectorizable_assignment.
4763 Check if STMT performs an assignment (copy) that can be vectorized.
4764 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4765 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4766 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4769 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4770 gimple
**vec_stmt
, slp_tree slp_node
)
4775 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4776 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4779 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
4783 vec
<tree
> vec_oprnds
= vNULL
;
4785 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4786 vec_info
*vinfo
= stmt_info
->vinfo
;
4787 gimple
*new_stmt
= NULL
;
4788 stmt_vec_info prev_stmt_info
= NULL
;
4789 enum tree_code code
;
4792 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4795 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4799 /* Is vectorizable assignment? */
4800 if (!is_gimple_assign (stmt
))
4803 scalar_dest
= gimple_assign_lhs (stmt
);
4804 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4807 code
= gimple_assign_rhs_code (stmt
);
4808 if (gimple_assign_single_p (stmt
)
4809 || code
== PAREN_EXPR
4810 || CONVERT_EXPR_CODE_P (code
))
4811 op
= gimple_assign_rhs1 (stmt
);
4815 if (code
== VIEW_CONVERT_EXPR
)
4816 op
= TREE_OPERAND (op
, 0);
4818 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4819 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4821 /* Multiple types in SLP are handled by creating the appropriate number of
4822 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4827 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4829 gcc_assert (ncopies
>= 1);
4831 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4833 if (dump_enabled_p ())
4834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4835 "use not simple.\n");
4839 /* We can handle NOP_EXPR conversions that do not change the number
4840 of elements or the vector size. */
4841 if ((CONVERT_EXPR_CODE_P (code
)
4842 || code
== VIEW_CONVERT_EXPR
)
4844 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
4845 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
4846 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4849 /* We do not handle bit-precision changes. */
4850 if ((CONVERT_EXPR_CODE_P (code
)
4851 || code
== VIEW_CONVERT_EXPR
)
4852 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4853 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
4854 || !type_has_mode_precision_p (TREE_TYPE (op
)))
4855 /* But a conversion that does not change the bit-pattern is ok. */
4856 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4857 > TYPE_PRECISION (TREE_TYPE (op
)))
4858 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4859 /* Conversion between boolean types of different sizes is
4860 a simple assignment in case their vectypes are same
4862 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4863 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4865 if (dump_enabled_p ())
4866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4867 "type conversion to/from bit-precision "
4872 if (!vec_stmt
) /* transformation not required. */
4874 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4875 if (dump_enabled_p ())
4876 dump_printf_loc (MSG_NOTE
, vect_location
,
4877 "=== vectorizable_assignment ===\n");
4878 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4887 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4890 for (j
= 0; j
< ncopies
; j
++)
4894 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
4896 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4898 /* Arguments are ready. create the new vector stmt. */
4899 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4901 if (CONVERT_EXPR_CODE_P (code
)
4902 || code
== VIEW_CONVERT_EXPR
)
4903 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4904 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4905 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4906 gimple_assign_set_lhs (new_stmt
, new_temp
);
4907 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4909 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4916 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4918 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4920 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4923 vec_oprnds
.release ();
4928 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4929 either as shift by a scalar or by a vector. */
4932 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4935 machine_mode vec_mode
;
4940 vectype
= get_vectype_for_scalar_type (scalar_type
);
4944 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4946 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4948 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4950 || (optab_handler (optab
, TYPE_MODE (vectype
))
4951 == CODE_FOR_nothing
))
4955 vec_mode
= TYPE_MODE (vectype
);
4956 icode
= (int) optab_handler (optab
, vec_mode
);
4957 if (icode
== CODE_FOR_nothing
)
4964 /* Function vectorizable_shift.
4966 Check if STMT performs a shift operation that can be vectorized.
4967 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4968 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4969 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4972 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4973 gimple
**vec_stmt
, slp_tree slp_node
)
4977 tree op0
, op1
= NULL
;
4978 tree vec_oprnd1
= NULL_TREE
;
4979 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4981 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4982 enum tree_code code
;
4983 machine_mode vec_mode
;
4987 machine_mode optab_op2_mode
;
4989 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4991 gimple
*new_stmt
= NULL
;
4992 stmt_vec_info prev_stmt_info
;
4993 poly_uint64 nunits_in
;
4994 poly_uint64 nunits_out
;
4999 vec
<tree
> vec_oprnds0
= vNULL
;
5000 vec
<tree
> vec_oprnds1
= vNULL
;
5003 bool scalar_shift_arg
= true;
5004 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5005 vec_info
*vinfo
= stmt_info
->vinfo
;
5007 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5010 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5014 /* Is STMT a vectorizable binary/unary operation? */
5015 if (!is_gimple_assign (stmt
))
5018 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5021 code
= gimple_assign_rhs_code (stmt
);
5023 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5024 || code
== RROTATE_EXPR
))
5027 scalar_dest
= gimple_assign_lhs (stmt
);
5028 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5029 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5031 if (dump_enabled_p ())
5032 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5033 "bit-precision shifts not supported.\n");
5037 op0
= gimple_assign_rhs1 (stmt
);
5038 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5042 "use not simple.\n");
5045 /* If op0 is an external or constant def use a vector type with
5046 the same size as the output vector type. */
5048 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5050 gcc_assert (vectype
);
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5055 "no vectype for scalar type\n");
5059 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5060 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5061 if (maybe_ne (nunits_out
, nunits_in
))
5064 op1
= gimple_assign_rhs2 (stmt
);
5065 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5069 "use not simple.\n");
5073 /* Multiple types in SLP are handled by creating the appropriate number of
5074 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5079 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5081 gcc_assert (ncopies
>= 1);
5083 /* Determine whether the shift amount is a vector, or scalar. If the
5084 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5086 if ((dt
[1] == vect_internal_def
5087 || dt
[1] == vect_induction_def
)
5089 scalar_shift_arg
= false;
5090 else if (dt
[1] == vect_constant_def
5091 || dt
[1] == vect_external_def
5092 || dt
[1] == vect_internal_def
)
5094 /* In SLP, need to check whether the shift count is the same,
5095 in loops if it is a constant or invariant, it is always
5099 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5102 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
5103 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5104 scalar_shift_arg
= false;
5107 /* If the shift amount is computed by a pattern stmt we cannot
5108 use the scalar amount directly thus give up and use a vector
5110 if (dt
[1] == vect_internal_def
)
5112 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
5113 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
5114 scalar_shift_arg
= false;
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5121 "operand mode requires invariant argument.\n");
5125 /* Vector shifted by vector. */
5126 if (!scalar_shift_arg
)
5128 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5129 if (dump_enabled_p ())
5130 dump_printf_loc (MSG_NOTE
, vect_location
,
5131 "vector/vector shift/rotate found.\n");
5134 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5135 if (op1_vectype
== NULL_TREE
5136 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5138 if (dump_enabled_p ())
5139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5140 "unusable type for last operand in"
5141 " vector/vector shift/rotate.\n");
5145 /* See if the machine has a vector shifted by scalar insn and if not
5146 then see if it has a vector shifted by vector insn. */
5149 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5151 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5153 if (dump_enabled_p ())
5154 dump_printf_loc (MSG_NOTE
, vect_location
,
5155 "vector/scalar shift/rotate found.\n");
5159 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5161 && (optab_handler (optab
, TYPE_MODE (vectype
))
5162 != CODE_FOR_nothing
))
5164 scalar_shift_arg
= false;
5166 if (dump_enabled_p ())
5167 dump_printf_loc (MSG_NOTE
, vect_location
,
5168 "vector/vector shift/rotate found.\n");
5170 /* Unlike the other binary operators, shifts/rotates have
5171 the rhs being int, instead of the same type as the lhs,
5172 so make sure the scalar is the right type if we are
5173 dealing with vectors of long long/long/short/char. */
5174 if (dt
[1] == vect_constant_def
)
5175 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5176 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5180 && TYPE_MODE (TREE_TYPE (vectype
))
5181 != TYPE_MODE (TREE_TYPE (op1
)))
5183 if (dump_enabled_p ())
5184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5185 "unusable type for last operand in"
5186 " vector/vector shift/rotate.\n");
5189 if (vec_stmt
&& !slp_node
)
5191 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5192 op1
= vect_init_vector (stmt
, op1
,
5193 TREE_TYPE (vectype
), NULL
);
5200 /* Supportable by target? */
5203 if (dump_enabled_p ())
5204 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5208 vec_mode
= TYPE_MODE (vectype
);
5209 icode
= (int) optab_handler (optab
, vec_mode
);
5210 if (icode
== CODE_FOR_nothing
)
5212 if (dump_enabled_p ())
5213 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5214 "op not supported by target.\n");
5215 /* Check only during analysis. */
5216 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5218 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5220 if (dump_enabled_p ())
5221 dump_printf_loc (MSG_NOTE
, vect_location
,
5222 "proceeding using word mode.\n");
5225 /* Worthwhile without SIMD support? Check only during analysis. */
5227 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5228 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5230 if (dump_enabled_p ())
5231 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5232 "not worthwhile without SIMD support.\n");
5236 if (!vec_stmt
) /* transformation not required. */
5238 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5239 if (dump_enabled_p ())
5240 dump_printf_loc (MSG_NOTE
, vect_location
,
5241 "=== vectorizable_shift ===\n");
5242 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5248 if (dump_enabled_p ())
5249 dump_printf_loc (MSG_NOTE
, vect_location
,
5250 "transform binary/unary operation.\n");
5253 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5255 prev_stmt_info
= NULL
;
5256 for (j
= 0; j
< ncopies
; j
++)
5261 if (scalar_shift_arg
)
5263 /* Vector shl and shr insn patterns can be defined with scalar
5264 operand 2 (shift operand). In this case, use constant or loop
5265 invariant op1 directly, without extending it to vector mode
5267 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5268 if (!VECTOR_MODE_P (optab_op2_mode
))
5270 if (dump_enabled_p ())
5271 dump_printf_loc (MSG_NOTE
, vect_location
,
5272 "operand 1 using scalar mode.\n");
5274 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5275 vec_oprnds1
.quick_push (vec_oprnd1
);
5278 /* Store vec_oprnd1 for every vector stmt to be created
5279 for SLP_NODE. We check during the analysis that all
5280 the shift arguments are the same.
5281 TODO: Allow different constants for different vector
5282 stmts generated for an SLP instance. */
5283 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5284 vec_oprnds1
.quick_push (vec_oprnd1
);
5289 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5290 (a special case for certain kind of vector shifts); otherwise,
5291 operand 1 should be of a vector type (the usual case). */
5293 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5296 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5300 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5302 /* Arguments are ready. Create the new vector stmt. */
5303 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5305 vop1
= vec_oprnds1
[i
];
5306 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5307 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5308 gimple_assign_set_lhs (new_stmt
, new_temp
);
5309 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5311 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5318 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5320 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5321 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5324 vec_oprnds0
.release ();
5325 vec_oprnds1
.release ();
5331 /* Function vectorizable_operation.
5333 Check if STMT performs a binary, unary or ternary operation that can
5335 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5336 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5337 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5340 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5341 gimple
**vec_stmt
, slp_tree slp_node
)
5345 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5346 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5348 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5349 enum tree_code code
, orig_code
;
5350 machine_mode vec_mode
;
5354 bool target_support_p
;
5356 enum vect_def_type dt
[3]
5357 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5359 gimple
*new_stmt
= NULL
;
5360 stmt_vec_info prev_stmt_info
;
5361 poly_uint64 nunits_in
;
5362 poly_uint64 nunits_out
;
5366 vec
<tree
> vec_oprnds0
= vNULL
;
5367 vec
<tree
> vec_oprnds1
= vNULL
;
5368 vec
<tree
> vec_oprnds2
= vNULL
;
5369 tree vop0
, vop1
, vop2
;
5370 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5371 vec_info
*vinfo
= stmt_info
->vinfo
;
5373 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5376 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5380 /* Is STMT a vectorizable binary/unary operation? */
5381 if (!is_gimple_assign (stmt
))
5384 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5387 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5389 /* For pointer addition and subtraction, we should use the normal
5390 plus and minus for the vector operation. */
5391 if (code
== POINTER_PLUS_EXPR
)
5393 if (code
== POINTER_DIFF_EXPR
)
5396 /* Support only unary or binary operations. */
5397 op_type
= TREE_CODE_LENGTH (code
);
5398 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5400 if (dump_enabled_p ())
5401 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5402 "num. args = %d (not unary/binary/ternary op).\n",
5407 scalar_dest
= gimple_assign_lhs (stmt
);
5408 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5410 /* Most operations cannot handle bit-precision types without extra
5412 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5413 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5414 /* Exception are bitwise binary operations. */
5415 && code
!= BIT_IOR_EXPR
5416 && code
!= BIT_XOR_EXPR
5417 && code
!= BIT_AND_EXPR
)
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5421 "bit-precision arithmetic not supported.\n");
5425 op0
= gimple_assign_rhs1 (stmt
);
5426 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5428 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5430 "use not simple.\n");
5433 /* If op0 is an external or constant def use a vector type with
5434 the same size as the output vector type. */
5437 /* For boolean type we cannot determine vectype by
5438 invariant value (don't know whether it is a vector
5439 of booleans or vector of integers). We use output
5440 vectype because operations on boolean don't change
5442 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5444 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5446 if (dump_enabled_p ())
5447 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5448 "not supported operation on bool value.\n");
5451 vectype
= vectype_out
;
5454 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5457 gcc_assert (vectype
);
5460 if (dump_enabled_p ())
5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5463 "no vectype for scalar type ");
5464 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5466 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5472 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5473 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5474 if (maybe_ne (nunits_out
, nunits_in
))
5477 if (op_type
== binary_op
|| op_type
== ternary_op
)
5479 op1
= gimple_assign_rhs2 (stmt
);
5480 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5482 if (dump_enabled_p ())
5483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5484 "use not simple.\n");
5488 if (op_type
== ternary_op
)
5490 op2
= gimple_assign_rhs3 (stmt
);
5491 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5493 if (dump_enabled_p ())
5494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5495 "use not simple.\n");
5500 /* Multiple types in SLP are handled by creating the appropriate number of
5501 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5506 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5508 gcc_assert (ncopies
>= 1);
5510 /* Shifts are handled in vectorizable_shift (). */
5511 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5512 || code
== RROTATE_EXPR
)
5515 /* Supportable by target? */
5517 vec_mode
= TYPE_MODE (vectype
);
5518 if (code
== MULT_HIGHPART_EXPR
)
5519 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5522 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5525 if (dump_enabled_p ())
5526 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5530 target_support_p
= (optab_handler (optab
, vec_mode
)
5531 != CODE_FOR_nothing
);
5534 if (!target_support_p
)
5536 if (dump_enabled_p ())
5537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5538 "op not supported by target.\n");
5539 /* Check only during analysis. */
5540 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5541 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_NOTE
, vect_location
,
5545 "proceeding using word mode.\n");
5548 /* Worthwhile without SIMD support? Check only during analysis. */
5549 if (!VECTOR_MODE_P (vec_mode
)
5551 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5553 if (dump_enabled_p ())
5554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5555 "not worthwhile without SIMD support.\n");
5559 if (!vec_stmt
) /* transformation not required. */
5561 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5562 if (dump_enabled_p ())
5563 dump_printf_loc (MSG_NOTE
, vect_location
,
5564 "=== vectorizable_operation ===\n");
5565 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5571 if (dump_enabled_p ())
5572 dump_printf_loc (MSG_NOTE
, vect_location
,
5573 "transform binary/unary operation.\n");
5576 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5578 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5579 vectors with unsigned elements, but the result is signed. So, we
5580 need to compute the MINUS_EXPR into vectype temporary and
5581 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5582 tree vec_cvt_dest
= NULL_TREE
;
5583 if (orig_code
== POINTER_DIFF_EXPR
)
5584 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5586 /* In case the vectorization factor (VF) is bigger than the number
5587 of elements that we can fit in a vectype (nunits), we have to generate
5588 more than one vector stmt - i.e - we need to "unroll" the
5589 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5590 from one copy of the vector stmt to the next, in the field
5591 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5592 stages to find the correct vector defs to be used when vectorizing
5593 stmts that use the defs of the current stmt. The example below
5594 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5595 we need to create 4 vectorized stmts):
5597 before vectorization:
5598 RELATED_STMT VEC_STMT
5602 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5604 RELATED_STMT VEC_STMT
5605 VS1_0: vx0 = memref0 VS1_1 -
5606 VS1_1: vx1 = memref1 VS1_2 -
5607 VS1_2: vx2 = memref2 VS1_3 -
5608 VS1_3: vx3 = memref3 - -
5609 S1: x = load - VS1_0
5612 step2: vectorize stmt S2 (done here):
5613 To vectorize stmt S2 we first need to find the relevant vector
5614 def for the first operand 'x'. This is, as usual, obtained from
5615 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5616 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5617 relevant vector def 'vx0'. Having found 'vx0' we can generate
5618 the vector stmt VS2_0, and as usual, record it in the
5619 STMT_VINFO_VEC_STMT of stmt S2.
5620 When creating the second copy (VS2_1), we obtain the relevant vector
5621 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5622 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5623 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5624 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5625 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5626 chain of stmts and pointers:
5627 RELATED_STMT VEC_STMT
5628 VS1_0: vx0 = memref0 VS1_1 -
5629 VS1_1: vx1 = memref1 VS1_2 -
5630 VS1_2: vx2 = memref2 VS1_3 -
5631 VS1_3: vx3 = memref3 - -
5632 S1: x = load - VS1_0
5633 VS2_0: vz0 = vx0 + v1 VS2_1 -
5634 VS2_1: vz1 = vx1 + v1 VS2_2 -
5635 VS2_2: vz2 = vx2 + v1 VS2_3 -
5636 VS2_3: vz3 = vx3 + v1 - -
5637 S2: z = x + 1 - VS2_0 */
5639 prev_stmt_info
= NULL
;
5640 for (j
= 0; j
< ncopies
; j
++)
5645 if (op_type
== binary_op
|| op_type
== ternary_op
)
5646 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5649 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5651 if (op_type
== ternary_op
)
5652 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5657 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5658 if (op_type
== ternary_op
)
5660 tree vec_oprnd
= vec_oprnds2
.pop ();
5661 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5666 /* Arguments are ready. Create the new vector stmt. */
5667 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5669 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5670 ? vec_oprnds1
[i
] : NULL_TREE
);
5671 vop2
= ((op_type
== ternary_op
)
5672 ? vec_oprnds2
[i
] : NULL_TREE
);
5673 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5674 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5675 gimple_assign_set_lhs (new_stmt
, new_temp
);
5676 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5679 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
5680 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
5682 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
5683 gimple_assign_set_lhs (new_stmt
, new_temp
);
5684 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5687 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5694 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5696 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5697 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5700 vec_oprnds0
.release ();
5701 vec_oprnds1
.release ();
5702 vec_oprnds2
.release ();
5707 /* A helper function to ensure data reference DR's base alignment. */
5710 ensure_base_align (struct data_reference
*dr
)
5715 if (DR_VECT_AUX (dr
)->base_misaligned
)
5717 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5719 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
5721 if (decl_in_symtab_p (base_decl
))
5722 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
5725 SET_DECL_ALIGN (base_decl
, align_base_to
);
5726 DECL_USER_ALIGN (base_decl
) = 1;
5728 DR_VECT_AUX (dr
)->base_misaligned
= false;
5733 /* Function get_group_alias_ptr_type.
5735 Return the alias type for the group starting at FIRST_STMT. */
5738 get_group_alias_ptr_type (gimple
*first_stmt
)
5740 struct data_reference
*first_dr
, *next_dr
;
5743 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5744 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5747 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5748 if (get_alias_set (DR_REF (first_dr
))
5749 != get_alias_set (DR_REF (next_dr
)))
5751 if (dump_enabled_p ())
5752 dump_printf_loc (MSG_NOTE
, vect_location
,
5753 "conflicting alias set types.\n");
5754 return ptr_type_node
;
5756 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5758 return reference_alias_ptr_type (DR_REF (first_dr
));
5762 /* Function vectorizable_store.
5764 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5766 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5767 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5768 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5771 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5776 tree vec_oprnd
= NULL_TREE
;
5777 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5778 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5780 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5781 struct loop
*loop
= NULL
;
5782 machine_mode vec_mode
;
5784 enum dr_alignment_support alignment_support_scheme
;
5786 enum vect_def_type dt
;
5787 stmt_vec_info prev_stmt_info
= NULL
;
5788 tree dataref_ptr
= NULL_TREE
;
5789 tree dataref_offset
= NULL_TREE
;
5790 gimple
*ptr_incr
= NULL
;
5793 gimple
*next_stmt
, *first_stmt
;
5795 unsigned int group_size
, i
;
5796 vec
<tree
> oprnds
= vNULL
;
5797 vec
<tree
> result_chain
= vNULL
;
5799 tree offset
= NULL_TREE
;
5800 vec
<tree
> vec_oprnds
= vNULL
;
5801 bool slp
= (slp_node
!= NULL
);
5802 unsigned int vec_num
;
5803 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5804 vec_info
*vinfo
= stmt_info
->vinfo
;
5806 gather_scatter_info gs_info
;
5807 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5810 vec_load_store_type vls_type
;
5813 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5816 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5820 /* Is vectorizable store? */
5822 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
5823 if (is_gimple_assign (stmt
))
5825 tree scalar_dest
= gimple_assign_lhs (stmt
);
5826 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5827 && is_pattern_stmt_p (stmt_info
))
5828 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5829 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5830 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5831 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5832 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5833 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5834 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5835 && TREE_CODE (scalar_dest
) != MEM_REF
)
5840 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
5841 if (!call
|| !gimple_call_internal_p (call
, IFN_MASK_STORE
))
5844 if (slp_node
!= NULL
)
5846 if (dump_enabled_p ())
5847 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5848 "SLP of masked stores not supported.\n");
5852 ref_type
= TREE_TYPE (gimple_call_arg (call
, 1));
5853 mask
= gimple_call_arg (call
, 2);
5854 if (!vect_check_load_store_mask (stmt
, mask
, &mask_vectype
))
5858 op
= vect_get_store_rhs (stmt
);
5860 /* Cannot have hybrid store SLP -- that would mean storing to the
5861 same location twice. */
5862 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5864 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5865 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5869 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5870 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5875 /* Multiple types in SLP are handled by creating the appropriate number of
5876 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5881 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5883 gcc_assert (ncopies
>= 1);
5885 /* FORNOW. This restriction should be relaxed. */
5886 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5888 if (dump_enabled_p ())
5889 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5890 "multiple types in nested loop.\n");
5894 if (!vect_check_store_rhs (stmt
, op
, &rhs_vectype
, &vls_type
))
5897 elem_type
= TREE_TYPE (vectype
);
5898 vec_mode
= TYPE_MODE (vectype
);
5900 if (!STMT_VINFO_DATA_REF (stmt_info
))
5903 vect_memory_access_type memory_access_type
;
5904 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, vls_type
, ncopies
,
5905 &memory_access_type
, &gs_info
))
5910 if (memory_access_type
== VMAT_CONTIGUOUS
)
5912 if (!VECTOR_MODE_P (vec_mode
)
5913 || !can_vec_mask_load_store_p (vec_mode
,
5914 TYPE_MODE (mask_vectype
), false))
5917 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
5919 if (dump_enabled_p ())
5920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5921 "unsupported access type for masked store.\n");
5927 /* FORNOW. In some cases can vectorize even if data-type not supported
5928 (e.g. - array initialization with 0). */
5929 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5933 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5936 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5937 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5938 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5944 group_size
= vec_num
= 1;
5947 if (!vec_stmt
) /* transformation not required. */
5949 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5952 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
5953 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
5954 memory_access_type
);
5956 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5957 /* The SLP costs are calculated during SLP analysis. */
5958 if (!PURE_SLP_STMT (stmt_info
))
5959 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
5960 vls_type
, NULL
, NULL
, NULL
);
5963 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5967 ensure_base_align (dr
);
5969 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5971 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
5972 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5973 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5974 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5975 edge pe
= loop_preheader_edge (loop
);
5978 enum { NARROW
, NONE
, WIDEN
} modifier
;
5979 poly_uint64 scatter_off_nunits
5980 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5982 if (known_eq (nunits
, scatter_off_nunits
))
5984 else if (known_eq (nunits
* 2, scatter_off_nunits
))
5988 /* Currently gathers and scatters are only supported for
5989 fixed-length vectors. */
5990 unsigned int count
= scatter_off_nunits
.to_constant ();
5991 vec_perm_builder
sel (count
, count
, 1);
5992 for (i
= 0; i
< (unsigned int) count
; ++i
)
5993 sel
.quick_push (i
| (count
/ 2));
5995 vec_perm_indices
indices (sel
, 1, count
);
5996 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
5998 gcc_assert (perm_mask
!= NULL_TREE
);
6000 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6004 /* Currently gathers and scatters are only supported for
6005 fixed-length vectors. */
6006 unsigned int count
= nunits
.to_constant ();
6007 vec_perm_builder
sel (count
, count
, 1);
6008 for (i
= 0; i
< (unsigned int) count
; ++i
)
6009 sel
.quick_push (i
| (count
/ 2));
6011 vec_perm_indices
indices (sel
, 2, count
);
6012 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6013 gcc_assert (perm_mask
!= NULL_TREE
);
6019 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6020 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6021 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6022 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6023 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6024 scaletype
= TREE_VALUE (arglist
);
6026 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6027 && TREE_CODE (rettype
) == VOID_TYPE
);
6029 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6030 if (!is_gimple_min_invariant (ptr
))
6032 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6033 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6034 gcc_assert (!new_bb
);
6037 /* Currently we support only unconditional scatter stores,
6038 so mask should be all ones. */
6039 mask
= build_int_cst (masktype
, -1);
6040 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6042 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6044 prev_stmt_info
= NULL
;
6045 for (j
= 0; j
< ncopies
; ++j
)
6050 = vect_get_vec_def_for_operand (op
, stmt
);
6052 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6054 else if (modifier
!= NONE
&& (j
& 1))
6056 if (modifier
== WIDEN
)
6059 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
6060 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6063 else if (modifier
== NARROW
)
6065 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6068 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6077 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
6079 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6083 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6085 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6086 TYPE_VECTOR_SUBPARTS (srctype
)));
6087 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6088 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6089 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6090 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6094 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6096 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6097 TYPE_VECTOR_SUBPARTS (idxtype
)));
6098 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6099 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6100 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6101 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6106 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
6108 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6110 if (prev_stmt_info
== NULL
)
6111 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6113 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6114 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6121 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
6124 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
6126 /* We vectorize all the stmts of the interleaving group when we
6127 reach the last stmt in the group. */
6128 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
6129 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
6138 grouped_store
= false;
6139 /* VEC_NUM is the number of vect stmts to be created for this
6141 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6142 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6143 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
6144 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6145 op
= vect_get_store_rhs (first_stmt
);
6148 /* VEC_NUM is the number of vect stmts to be created for this
6150 vec_num
= group_size
;
6152 ref_type
= get_group_alias_ptr_type (first_stmt
);
6155 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6157 if (dump_enabled_p ())
6158 dump_printf_loc (MSG_NOTE
, vect_location
,
6159 "transform store. ncopies = %d\n", ncopies
);
6161 if (memory_access_type
== VMAT_ELEMENTWISE
6162 || memory_access_type
== VMAT_STRIDED_SLP
)
6164 gimple_stmt_iterator incr_gsi
;
6170 gimple_seq stmts
= NULL
;
6171 tree stride_base
, stride_step
, alias_off
;
6174 /* Checked by get_load_store_type. */
6175 unsigned int const_nunits
= nunits
.to_constant ();
6177 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6178 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6181 = fold_build_pointer_plus
6182 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
6183 size_binop (PLUS_EXPR
,
6184 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
6185 convert_to_ptrofftype (DR_INIT (first_dr
))));
6186 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
6188 /* For a store with loop-invariant (but other than power-of-2)
6189 stride (i.e. not a grouped access) like so:
6191 for (i = 0; i < n; i += stride)
6194 we generate a new induction variable and new stores from
6195 the components of the (vectorized) rhs:
6197 for (j = 0; ; j += VF*stride)
6202 array[j + stride] = tmp2;
6206 unsigned nstores
= const_nunits
;
6208 tree ltype
= elem_type
;
6209 tree lvectype
= vectype
;
6212 if (group_size
< const_nunits
6213 && const_nunits
% group_size
== 0)
6215 nstores
= const_nunits
/ group_size
;
6217 ltype
= build_vector_type (elem_type
, group_size
);
6220 /* First check if vec_extract optab doesn't support extraction
6221 of vector elts directly. */
6222 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6224 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6225 || !VECTOR_MODE_P (vmode
)
6226 || (convert_optab_handler (vec_extract_optab
,
6227 TYPE_MODE (vectype
), vmode
)
6228 == CODE_FOR_nothing
))
6230 /* Try to avoid emitting an extract of vector elements
6231 by performing the extracts using an integer type of the
6232 same size, extracting from a vector of those and then
6233 re-interpreting it as the original vector type if
6236 = group_size
* GET_MODE_BITSIZE (elmode
);
6237 elmode
= int_mode_for_size (lsize
, 0).require ();
6238 unsigned int lnunits
= const_nunits
/ group_size
;
6239 /* If we can't construct such a vector fall back to
6240 element extracts from the original vector type and
6241 element size stores. */
6242 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6243 && VECTOR_MODE_P (vmode
)
6244 && (convert_optab_handler (vec_extract_optab
,
6246 != CODE_FOR_nothing
))
6250 ltype
= build_nonstandard_integer_type (lsize
, 1);
6251 lvectype
= build_vector_type (ltype
, nstores
);
6253 /* Else fall back to vector extraction anyway.
6254 Fewer stores are more important than avoiding spilling
6255 of the vector we extract from. Compared to the
6256 construction case in vectorizable_load no store-forwarding
6257 issue exists here for reasonable archs. */
6260 else if (group_size
>= const_nunits
6261 && group_size
% const_nunits
== 0)
6264 lnel
= const_nunits
;
6268 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6269 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6272 ivstep
= stride_step
;
6273 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6274 build_int_cst (TREE_TYPE (ivstep
), vf
));
6276 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6278 create_iv (stride_base
, ivstep
, NULL
,
6279 loop
, &incr_gsi
, insert_after
,
6281 incr
= gsi_stmt (incr_gsi
);
6282 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6284 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6286 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6288 prev_stmt_info
= NULL
;
6289 alias_off
= build_int_cst (ref_type
, 0);
6290 next_stmt
= first_stmt
;
6291 for (g
= 0; g
< group_size
; g
++)
6293 running_off
= offvar
;
6296 tree size
= TYPE_SIZE_UNIT (ltype
);
6297 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6299 tree newoff
= copy_ssa_name (running_off
, NULL
);
6300 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6302 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6303 running_off
= newoff
;
6305 unsigned int group_el
= 0;
6306 unsigned HOST_WIDE_INT
6307 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6308 for (j
= 0; j
< ncopies
; j
++)
6310 /* We've set op and dt above, from vect_get_store_rhs,
6311 and first_stmt == stmt. */
6316 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6318 vec_oprnd
= vec_oprnds
[0];
6322 op
= vect_get_store_rhs (next_stmt
);
6323 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6329 vec_oprnd
= vec_oprnds
[j
];
6332 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6333 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6336 /* Pun the vector to extract from if necessary. */
6337 if (lvectype
!= vectype
)
6339 tree tem
= make_ssa_name (lvectype
);
6341 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6342 lvectype
, vec_oprnd
));
6343 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6346 for (i
= 0; i
< nstores
; i
++)
6348 tree newref
, newoff
;
6349 gimple
*incr
, *assign
;
6350 tree size
= TYPE_SIZE (ltype
);
6351 /* Extract the i'th component. */
6352 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6353 bitsize_int (i
), size
);
6354 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6357 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6361 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6363 newref
= build2 (MEM_REF
, ltype
,
6364 running_off
, this_off
);
6366 /* And store it to *running_off. */
6367 assign
= gimple_build_assign (newref
, elem
);
6368 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6372 || group_el
== group_size
)
6374 newoff
= copy_ssa_name (running_off
, NULL
);
6375 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6376 running_off
, stride_step
);
6377 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6379 running_off
= newoff
;
6382 if (g
== group_size
- 1
6385 if (j
== 0 && i
== 0)
6386 STMT_VINFO_VEC_STMT (stmt_info
)
6387 = *vec_stmt
= assign
;
6389 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6390 prev_stmt_info
= vinfo_for_stmt (assign
);
6394 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6399 vec_oprnds
.release ();
6403 auto_vec
<tree
> dr_chain (group_size
);
6404 oprnds
.create (group_size
);
6406 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6407 gcc_assert (alignment_support_scheme
);
6408 bool masked_loop_p
= (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6409 /* Targets with store-lane instructions must not require explicit
6410 realignment. vect_supportable_dr_alignment always returns either
6411 dr_aligned or dr_unaligned_supported for masked operations. */
6412 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
6415 || alignment_support_scheme
== dr_aligned
6416 || alignment_support_scheme
== dr_unaligned_supported
);
6418 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6419 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6420 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6422 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6423 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6425 aggr_type
= vectype
;
6428 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
6430 /* In case the vectorization factor (VF) is bigger than the number
6431 of elements that we can fit in a vectype (nunits), we have to generate
6432 more than one vector stmt - i.e - we need to "unroll" the
6433 vector stmt by a factor VF/nunits. For more details see documentation in
6434 vect_get_vec_def_for_copy_stmt. */
6436 /* In case of interleaving (non-unit grouped access):
6443 We create vectorized stores starting from base address (the access of the
6444 first stmt in the chain (S2 in the above example), when the last store stmt
6445 of the chain (S4) is reached:
6448 VS2: &base + vec_size*1 = vx0
6449 VS3: &base + vec_size*2 = vx1
6450 VS4: &base + vec_size*3 = vx3
6452 Then permutation statements are generated:
6454 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6455 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6458 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6459 (the order of the data-refs in the output of vect_permute_store_chain
6460 corresponds to the order of scalar stmts in the interleaving chain - see
6461 the documentation of vect_permute_store_chain()).
6463 In case of both multiple types and interleaving, above vector stores and
6464 permutation stmts are created for every copy. The result vector stmts are
6465 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6466 STMT_VINFO_RELATED_STMT for the next copies.
6469 prev_stmt_info
= NULL
;
6470 tree vec_mask
= NULL_TREE
;
6471 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
6472 for (j
= 0; j
< ncopies
; j
++)
6479 /* Get vectorized arguments for SLP_NODE. */
6480 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6483 vec_oprnd
= vec_oprnds
[0];
6487 /* For interleaved stores we collect vectorized defs for all the
6488 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6489 used as an input to vect_permute_store_chain(), and OPRNDS as
6490 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6492 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6493 OPRNDS are of size 1. */
6494 next_stmt
= first_stmt
;
6495 for (i
= 0; i
< group_size
; i
++)
6497 /* Since gaps are not supported for interleaved stores,
6498 GROUP_SIZE is the exact number of stmts in the chain.
6499 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6500 there is no interleaving, GROUP_SIZE is 1, and only one
6501 iteration of the loop will be executed. */
6502 op
= vect_get_store_rhs (next_stmt
);
6503 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6504 dr_chain
.quick_push (vec_oprnd
);
6505 oprnds
.quick_push (vec_oprnd
);
6506 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6509 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
6513 /* We should have catched mismatched types earlier. */
6514 gcc_assert (useless_type_conversion_p (vectype
,
6515 TREE_TYPE (vec_oprnd
)));
6516 bool simd_lane_access_p
6517 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6518 if (simd_lane_access_p
6519 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6520 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6521 && integer_zerop (DR_OFFSET (first_dr
))
6522 && integer_zerop (DR_INIT (first_dr
))
6523 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6524 get_alias_set (TREE_TYPE (ref_type
))))
6526 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6527 dataref_offset
= build_int_cst (ref_type
, 0);
6532 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6533 simd_lane_access_p
? loop
: NULL
,
6534 offset
, &dummy
, gsi
, &ptr_incr
,
6535 simd_lane_access_p
, &inv_p
);
6536 gcc_assert (bb_vinfo
|| !inv_p
);
6540 /* For interleaved stores we created vectorized defs for all the
6541 defs stored in OPRNDS in the previous iteration (previous copy).
6542 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6543 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6545 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6546 OPRNDS are of size 1. */
6547 for (i
= 0; i
< group_size
; i
++)
6550 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6551 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6552 dr_chain
[i
] = vec_oprnd
;
6553 oprnds
[i
] = vec_oprnd
;
6557 vect_is_simple_use (vec_mask
, vinfo
, &def_stmt
, &dt
);
6558 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
6562 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6563 TYPE_SIZE_UNIT (aggr_type
));
6565 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6566 TYPE_SIZE_UNIT (aggr_type
));
6569 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6573 /* Combine all the vectors into an array. */
6574 vec_array
= create_vector_array (vectype
, vec_num
);
6575 for (i
= 0; i
< vec_num
; i
++)
6577 vec_oprnd
= dr_chain
[i
];
6578 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6581 tree final_mask
= NULL
;
6583 final_mask
= vect_get_loop_mask (gsi
, masks
, ncopies
, vectype
, j
);
6585 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
6592 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6594 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
6595 tree alias_ptr
= build_int_cst (ref_type
, align
);
6596 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
6597 dataref_ptr
, alias_ptr
,
6598 final_mask
, vec_array
);
6603 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6604 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6605 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
6607 gimple_call_set_lhs (call
, data_ref
);
6609 gimple_call_set_nothrow (call
, true);
6611 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6619 result_chain
.create (group_size
);
6621 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6625 next_stmt
= first_stmt
;
6626 for (i
= 0; i
< vec_num
; i
++)
6628 unsigned align
, misalign
;
6630 tree final_mask
= NULL_TREE
;
6632 final_mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6633 vectype
, vec_num
* j
+ i
);
6635 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
6639 /* Bump the vector pointer. */
6640 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6644 vec_oprnd
= vec_oprnds
[i
];
6645 else if (grouped_store
)
6646 /* For grouped stores vectorized defs are interleaved in
6647 vect_permute_store_chain(). */
6648 vec_oprnd
= result_chain
[i
];
6650 align
= DR_TARGET_ALIGNMENT (first_dr
);
6651 if (aligned_access_p (first_dr
))
6653 else if (DR_MISALIGNMENT (first_dr
) == -1)
6655 align
= dr_alignment (vect_dr_behavior (first_dr
));
6659 misalign
= DR_MISALIGNMENT (first_dr
);
6660 if (dataref_offset
== NULL_TREE
6661 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6662 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6665 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6667 tree perm_mask
= perm_mask_for_reverse (vectype
);
6669 = vect_create_destination_var (vect_get_store_rhs (stmt
),
6671 tree new_temp
= make_ssa_name (perm_dest
);
6673 /* Generate the permute statement. */
6675 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6676 vec_oprnd
, perm_mask
);
6677 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6679 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6680 vec_oprnd
= new_temp
;
6683 /* Arguments are ready. Create the new vector stmt. */
6686 align
= least_bit_hwi (misalign
| align
);
6687 tree ptr
= build_int_cst (ref_type
, align
);
6689 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
6691 final_mask
, vec_oprnd
);
6692 gimple_call_set_nothrow (call
, true);
6697 data_ref
= fold_build2 (MEM_REF
, vectype
,
6701 : build_int_cst (ref_type
, 0));
6702 if (aligned_access_p (first_dr
))
6704 else if (DR_MISALIGNMENT (first_dr
) == -1)
6705 TREE_TYPE (data_ref
)
6706 = build_aligned_type (TREE_TYPE (data_ref
),
6707 align
* BITS_PER_UNIT
);
6709 TREE_TYPE (data_ref
)
6710 = build_aligned_type (TREE_TYPE (data_ref
),
6711 TYPE_ALIGN (elem_type
));
6712 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6714 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6719 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6727 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6729 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6730 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6735 result_chain
.release ();
6736 vec_oprnds
.release ();
6741 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6742 VECTOR_CST mask. No checks are made that the target platform supports the
6743 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6744 vect_gen_perm_mask_checked. */
6747 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
6751 poly_uint64 nunits
= sel
.length ();
6752 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
6754 mask_type
= build_vector_type (ssizetype
, nunits
);
6755 return vec_perm_indices_to_tree (mask_type
, sel
);
6758 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6759 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6762 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
6764 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
6765 return vect_gen_perm_mask_any (vectype
, sel
);
6768 /* Given a vector variable X and Y, that was generated for the scalar
6769 STMT, generate instructions to permute the vector elements of X and Y
6770 using permutation mask MASK_VEC, insert them at *GSI and return the
6771 permuted vector variable. */
6774 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6775 gimple_stmt_iterator
*gsi
)
6777 tree vectype
= TREE_TYPE (x
);
6778 tree perm_dest
, data_ref
;
6781 tree scalar_dest
= gimple_get_lhs (stmt
);
6782 if (TREE_CODE (scalar_dest
) == SSA_NAME
)
6783 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6785 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
6786 data_ref
= make_ssa_name (perm_dest
);
6788 /* Generate the permute statement. */
6789 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6790 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6795 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6796 inserting them on the loops preheader edge. Returns true if we
6797 were successful in doing so (and thus STMT can be moved then),
6798 otherwise returns false. */
6801 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6807 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6809 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6810 if (!gimple_nop_p (def_stmt
)
6811 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6813 /* Make sure we don't need to recurse. While we could do
6814 so in simple cases when there are more complex use webs
6815 we don't have an easy way to preserve stmt order to fulfil
6816 dependencies within them. */
6819 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6821 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6823 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6824 if (!gimple_nop_p (def_stmt2
)
6825 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6835 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6837 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6838 if (!gimple_nop_p (def_stmt
)
6839 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6841 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6842 gsi_remove (&gsi
, false);
6843 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6850 /* vectorizable_load.
6852 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6859 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6860 slp_tree slp_node
, slp_instance slp_node_instance
)
6863 tree vec_dest
= NULL
;
6864 tree data_ref
= NULL
;
6865 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6866 stmt_vec_info prev_stmt_info
;
6867 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6868 struct loop
*loop
= NULL
;
6869 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6870 bool nested_in_vect_loop
= false;
6871 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6875 gimple
*new_stmt
= NULL
;
6877 enum dr_alignment_support alignment_support_scheme
;
6878 tree dataref_ptr
= NULL_TREE
;
6879 tree dataref_offset
= NULL_TREE
;
6880 gimple
*ptr_incr
= NULL
;
6883 unsigned int group_size
;
6884 poly_uint64 group_gap_adj
;
6885 tree msq
= NULL_TREE
, lsq
;
6886 tree offset
= NULL_TREE
;
6887 tree byte_offset
= NULL_TREE
;
6888 tree realignment_token
= NULL_TREE
;
6890 vec
<tree
> dr_chain
= vNULL
;
6891 bool grouped_load
= false;
6893 gimple
*first_stmt_for_drptr
= NULL
;
6895 bool compute_in_loop
= false;
6896 struct loop
*at_loop
;
6898 bool slp
= (slp_node
!= NULL
);
6899 bool slp_perm
= false;
6900 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6903 gather_scatter_info gs_info
;
6904 vec_info
*vinfo
= stmt_info
->vinfo
;
6907 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6910 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6914 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6915 if (is_gimple_assign (stmt
))
6917 scalar_dest
= gimple_assign_lhs (stmt
);
6918 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6921 tree_code code
= gimple_assign_rhs_code (stmt
);
6922 if (code
!= ARRAY_REF
6923 && code
!= BIT_FIELD_REF
6924 && code
!= INDIRECT_REF
6925 && code
!= COMPONENT_REF
6926 && code
!= IMAGPART_EXPR
6927 && code
!= REALPART_EXPR
6929 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6934 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
6935 if (!call
|| !gimple_call_internal_p (call
, IFN_MASK_LOAD
))
6938 scalar_dest
= gimple_call_lhs (call
);
6942 if (slp_node
!= NULL
)
6944 if (dump_enabled_p ())
6945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6946 "SLP of masked loads not supported.\n");
6950 mask
= gimple_call_arg (call
, 2);
6951 if (!vect_check_load_store_mask (stmt
, mask
, &mask_vectype
))
6955 if (!STMT_VINFO_DATA_REF (stmt_info
))
6958 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6959 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6963 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6964 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6965 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6970 /* Multiple types in SLP are handled by creating the appropriate number of
6971 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6976 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6978 gcc_assert (ncopies
>= 1);
6980 /* FORNOW. This restriction should be relaxed. */
6981 if (nested_in_vect_loop
&& ncopies
> 1)
6983 if (dump_enabled_p ())
6984 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6985 "multiple types in nested loop.\n");
6989 /* Invalidate assumptions made by dependence analysis when vectorization
6990 on the unrolled body effectively re-orders stmts. */
6992 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6993 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
6994 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6996 if (dump_enabled_p ())
6997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6998 "cannot perform implicit CSE when unrolling "
6999 "with negative dependence distance\n");
7003 elem_type
= TREE_TYPE (vectype
);
7004 mode
= TYPE_MODE (vectype
);
7006 /* FORNOW. In some cases can vectorize even if data-type not supported
7007 (e.g. - data copies). */
7008 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7010 if (dump_enabled_p ())
7011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7012 "Aligned load, but unsupported type.\n");
7016 /* Check if the load is a part of an interleaving chain. */
7017 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7019 grouped_load
= true;
7021 gcc_assert (!nested_in_vect_loop
);
7022 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7024 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7025 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7027 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7030 /* Invalidate assumptions made by dependence analysis when vectorization
7031 on the unrolled body effectively re-orders stmts. */
7032 if (!PURE_SLP_STMT (stmt_info
)
7033 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7034 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7035 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7037 if (dump_enabled_p ())
7038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7039 "cannot perform implicit CSE when performing "
7040 "group loads with negative dependence distance\n");
7044 /* Similarly when the stmt is a load that is both part of a SLP
7045 instance and a loop vectorized stmt via the same-dr mechanism
7046 we have to give up. */
7047 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
7048 && (STMT_SLP_TYPE (stmt_info
)
7049 != STMT_SLP_TYPE (vinfo_for_stmt
7050 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
7052 if (dump_enabled_p ())
7053 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7054 "conflicting SLP types for CSEd load\n");
7061 vect_memory_access_type memory_access_type
;
7062 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7063 &memory_access_type
, &gs_info
))
7068 if (memory_access_type
== VMAT_CONTIGUOUS
)
7070 machine_mode vec_mode
= TYPE_MODE (vectype
);
7071 if (!VECTOR_MODE_P (vec_mode
)
7072 || !can_vec_mask_load_store_p (vec_mode
,
7073 TYPE_MODE (mask_vectype
), true))
7076 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7078 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7080 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
7081 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
7083 if (dump_enabled_p ())
7084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7085 "masked gather with integer mask not"
7090 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7092 if (dump_enabled_p ())
7093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7094 "unsupported access type for masked load.\n");
7099 if (!vec_stmt
) /* transformation not required. */
7102 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7105 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7106 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7107 memory_access_type
);
7109 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7110 /* The SLP costs are calculated during SLP analysis. */
7111 if (!PURE_SLP_STMT (stmt_info
))
7112 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7118 gcc_assert (memory_access_type
7119 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7121 if (dump_enabled_p ())
7122 dump_printf_loc (MSG_NOTE
, vect_location
,
7123 "transform load. ncopies = %d\n", ncopies
);
7127 ensure_base_align (dr
);
7129 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7131 vect_build_gather_load_calls (stmt
, gsi
, vec_stmt
, &gs_info
, mask
);
7135 if (memory_access_type
== VMAT_ELEMENTWISE
7136 || memory_access_type
== VMAT_STRIDED_SLP
)
7138 gimple_stmt_iterator incr_gsi
;
7144 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7145 gimple_seq stmts
= NULL
;
7146 tree stride_base
, stride_step
, alias_off
;
7147 /* Checked by get_load_store_type. */
7148 unsigned int const_nunits
= nunits
.to_constant ();
7150 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7151 gcc_assert (!nested_in_vect_loop
);
7153 if (slp
&& grouped_load
)
7155 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7156 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7157 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7158 ref_type
= get_group_alias_ptr_type (first_stmt
);
7165 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7169 = fold_build_pointer_plus
7170 (DR_BASE_ADDRESS (first_dr
),
7171 size_binop (PLUS_EXPR
,
7172 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7173 convert_to_ptrofftype (DR_INIT (first_dr
))));
7174 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7176 /* For a load with loop-invariant (but other than power-of-2)
7177 stride (i.e. not a grouped access) like so:
7179 for (i = 0; i < n; i += stride)
7182 we generate a new induction variable and new accesses to
7183 form a new vector (or vectors, depending on ncopies):
7185 for (j = 0; ; j += VF*stride)
7187 tmp2 = array[j + stride];
7189 vectemp = {tmp1, tmp2, ...}
7192 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7193 build_int_cst (TREE_TYPE (stride_step
), vf
));
7195 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7197 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
7198 loop
, &incr_gsi
, insert_after
,
7200 incr
= gsi_stmt (incr_gsi
);
7201 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7203 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
7204 &stmts
, true, NULL_TREE
);
7206 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
7208 prev_stmt_info
= NULL
;
7209 running_off
= offvar
;
7210 alias_off
= build_int_cst (ref_type
, 0);
7211 int nloads
= const_nunits
;
7213 tree ltype
= TREE_TYPE (vectype
);
7214 tree lvectype
= vectype
;
7215 auto_vec
<tree
> dr_chain
;
7216 if (memory_access_type
== VMAT_STRIDED_SLP
)
7218 if (group_size
< const_nunits
)
7220 /* First check if vec_init optab supports construction from
7221 vector elts directly. */
7222 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7224 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7225 && VECTOR_MODE_P (vmode
)
7226 && (convert_optab_handler (vec_init_optab
,
7227 TYPE_MODE (vectype
), vmode
)
7228 != CODE_FOR_nothing
))
7230 nloads
= const_nunits
/ group_size
;
7232 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7236 /* Otherwise avoid emitting a constructor of vector elements
7237 by performing the loads using an integer type of the same
7238 size, constructing a vector of those and then
7239 re-interpreting it as the original vector type.
7240 This avoids a huge runtime penalty due to the general
7241 inability to perform store forwarding from smaller stores
7242 to a larger load. */
7244 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7245 elmode
= int_mode_for_size (lsize
, 0).require ();
7246 unsigned int lnunits
= const_nunits
/ group_size
;
7247 /* If we can't construct such a vector fall back to
7248 element loads of the original vector type. */
7249 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7250 && VECTOR_MODE_P (vmode
)
7251 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7252 != CODE_FOR_nothing
))
7256 ltype
= build_nonstandard_integer_type (lsize
, 1);
7257 lvectype
= build_vector_type (ltype
, nloads
);
7264 lnel
= const_nunits
;
7267 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7271 /* For SLP permutation support we need to load the whole group,
7272 not only the number of vector stmts the permutation result
7276 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7278 unsigned int const_vf
= vf
.to_constant ();
7279 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7280 dr_chain
.create (ncopies
);
7283 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7285 unsigned int group_el
= 0;
7286 unsigned HOST_WIDE_INT
7287 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7288 for (j
= 0; j
< ncopies
; j
++)
7291 vec_alloc (v
, nloads
);
7292 for (i
= 0; i
< nloads
; i
++)
7294 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7296 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7297 build2 (MEM_REF
, ltype
,
7298 running_off
, this_off
));
7299 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7301 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7302 gimple_assign_lhs (new_stmt
));
7306 || group_el
== group_size
)
7308 tree newoff
= copy_ssa_name (running_off
);
7309 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7310 running_off
, stride_step
);
7311 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7313 running_off
= newoff
;
7319 tree vec_inv
= build_constructor (lvectype
, v
);
7320 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7321 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7322 if (lvectype
!= vectype
)
7324 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7326 build1 (VIEW_CONVERT_EXPR
,
7327 vectype
, new_temp
));
7328 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7335 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7337 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7342 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7344 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7345 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7351 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7352 slp_node_instance
, false, &n_perms
);
7359 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7360 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7361 /* For SLP vectorization we directly vectorize a subchain
7362 without permutation. */
7363 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7364 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7365 /* For BB vectorization always use the first stmt to base
7366 the data ref pointer on. */
7368 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7370 /* Check if the chain of loads is already vectorized. */
7371 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7372 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7373 ??? But we can only do so if there is exactly one
7374 as we have no way to get at the rest. Leave the CSE
7376 ??? With the group load eventually participating
7377 in multiple different permutations (having multiple
7378 slp nodes which refer to the same group) the CSE
7379 is even wrong code. See PR56270. */
7382 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7385 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7388 /* VEC_NUM is the number of vect stmts to be created for this group. */
7391 grouped_load
= false;
7392 /* For SLP permutation support we need to load the whole group,
7393 not only the number of vector stmts the permutation result
7397 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7399 unsigned int const_vf
= vf
.to_constant ();
7400 unsigned int const_nunits
= nunits
.to_constant ();
7401 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
7402 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7406 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7408 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7412 vec_num
= group_size
;
7414 ref_type
= get_group_alias_ptr_type (first_stmt
);
7420 group_size
= vec_num
= 1;
7422 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7425 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7426 gcc_assert (alignment_support_scheme
);
7427 bool masked_loop_p
= (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7428 /* Targets with store-lane instructions must not require explicit
7429 realignment. vect_supportable_dr_alignment always returns either
7430 dr_aligned or dr_unaligned_supported for masked operations. */
7431 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7434 || alignment_support_scheme
== dr_aligned
7435 || alignment_support_scheme
== dr_unaligned_supported
);
7437 /* In case the vectorization factor (VF) is bigger than the number
7438 of elements that we can fit in a vectype (nunits), we have to generate
7439 more than one vector stmt - i.e - we need to "unroll" the
7440 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7441 from one copy of the vector stmt to the next, in the field
7442 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7443 stages to find the correct vector defs to be used when vectorizing
7444 stmts that use the defs of the current stmt. The example below
7445 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7446 need to create 4 vectorized stmts):
7448 before vectorization:
7449 RELATED_STMT VEC_STMT
7453 step 1: vectorize stmt S1:
7454 We first create the vector stmt VS1_0, and, as usual, record a
7455 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7456 Next, we create the vector stmt VS1_1, and record a pointer to
7457 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7458 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7460 RELATED_STMT VEC_STMT
7461 VS1_0: vx0 = memref0 VS1_1 -
7462 VS1_1: vx1 = memref1 VS1_2 -
7463 VS1_2: vx2 = memref2 VS1_3 -
7464 VS1_3: vx3 = memref3 - -
7465 S1: x = load - VS1_0
7468 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7469 information we recorded in RELATED_STMT field is used to vectorize
7472 /* In case of interleaving (non-unit grouped access):
7479 Vectorized loads are created in the order of memory accesses
7480 starting from the access of the first stmt of the chain:
7483 VS2: vx1 = &base + vec_size*1
7484 VS3: vx3 = &base + vec_size*2
7485 VS4: vx4 = &base + vec_size*3
7487 Then permutation statements are generated:
7489 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7490 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7493 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7494 (the order of the data-refs in the output of vect_permute_load_chain
7495 corresponds to the order of scalar stmts in the interleaving chain - see
7496 the documentation of vect_permute_load_chain()).
7497 The generation of permutation stmts and recording them in
7498 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7500 In case of both multiple types and interleaving, the vector loads and
7501 permutation stmts above are created for every copy. The result vector
7502 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7503 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7505 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7506 on a target that supports unaligned accesses (dr_unaligned_supported)
7507 we generate the following code:
7511 p = p + indx * vectype_size;
7516 Otherwise, the data reference is potentially unaligned on a target that
7517 does not support unaligned accesses (dr_explicit_realign_optimized) -
7518 then generate the following code, in which the data in each iteration is
7519 obtained by two vector loads, one from the previous iteration, and one
7520 from the current iteration:
7522 msq_init = *(floor(p1))
7523 p2 = initial_addr + VS - 1;
7524 realignment_token = call target_builtin;
7527 p2 = p2 + indx * vectype_size
7529 vec_dest = realign_load (msq, lsq, realignment_token)
7534 /* If the misalignment remains the same throughout the execution of the
7535 loop, we can create the init_addr and permutation mask at the loop
7536 preheader. Otherwise, it needs to be created inside the loop.
7537 This can only occur when vectorizing memory accesses in the inner-loop
7538 nested within an outer-loop that is being vectorized. */
7540 if (nested_in_vect_loop
7541 && !multiple_p (DR_STEP_ALIGNMENT (dr
),
7542 GET_MODE_SIZE (TYPE_MODE (vectype
))))
7544 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7545 compute_in_loop
= true;
7548 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7549 || alignment_support_scheme
== dr_explicit_realign
)
7550 && !compute_in_loop
)
7552 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7553 alignment_support_scheme
, NULL_TREE
,
7555 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7557 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7558 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7565 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7566 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7568 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7569 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7571 aggr_type
= vectype
;
7573 tree vec_mask
= NULL_TREE
;
7574 prev_stmt_info
= NULL
;
7575 poly_uint64 group_elt
= 0;
7576 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
7577 for (j
= 0; j
< ncopies
; j
++)
7579 /* 1. Create the vector or array pointer update chain. */
7582 bool simd_lane_access_p
7583 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7584 if (simd_lane_access_p
7585 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7586 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7587 && integer_zerop (DR_OFFSET (first_dr
))
7588 && integer_zerop (DR_INIT (first_dr
))
7589 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7590 get_alias_set (TREE_TYPE (ref_type
)))
7591 && (alignment_support_scheme
== dr_aligned
7592 || alignment_support_scheme
== dr_unaligned_supported
))
7594 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7595 dataref_offset
= build_int_cst (ref_type
, 0);
7598 else if (first_stmt_for_drptr
7599 && first_stmt
!= first_stmt_for_drptr
)
7602 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7603 at_loop
, offset
, &dummy
, gsi
,
7604 &ptr_incr
, simd_lane_access_p
,
7605 &inv_p
, byte_offset
);
7606 /* Adjust the pointer by the difference to first_stmt. */
7607 data_reference_p ptrdr
7608 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7609 tree diff
= fold_convert (sizetype
,
7610 size_binop (MINUS_EXPR
,
7613 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7618 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7619 offset
, &dummy
, gsi
, &ptr_incr
,
7620 simd_lane_access_p
, &inv_p
,
7623 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
7629 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7630 TYPE_SIZE_UNIT (aggr_type
));
7632 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7633 TYPE_SIZE_UNIT (aggr_type
));
7638 vect_is_simple_use (vec_mask
, vinfo
, &def_stmt
, &dt
);
7639 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
7643 if (grouped_load
|| slp_perm
)
7644 dr_chain
.create (vec_num
);
7646 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7650 vec_array
= create_vector_array (vectype
, vec_num
);
7652 tree final_mask
= NULL_TREE
;
7654 final_mask
= vect_get_loop_mask (gsi
, masks
, ncopies
, vectype
, j
);
7656 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7663 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
7665 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7666 tree alias_ptr
= build_int_cst (ref_type
, align
);
7667 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
7668 dataref_ptr
, alias_ptr
,
7674 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7675 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7676 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7678 gimple_call_set_lhs (call
, vec_array
);
7679 gimple_call_set_nothrow (call
, true);
7681 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7683 /* Extract each vector into an SSA_NAME. */
7684 for (i
= 0; i
< vec_num
; i
++)
7686 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7688 dr_chain
.quick_push (new_temp
);
7691 /* Record the mapping between SSA_NAMEs and statements. */
7692 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7696 for (i
= 0; i
< vec_num
; i
++)
7698 tree final_mask
= NULL_TREE
;
7700 && memory_access_type
!= VMAT_INVARIANT
)
7701 final_mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
7702 vectype
, vec_num
* j
+ i
);
7704 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7708 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7711 /* 2. Create the vector-load in the loop. */
7712 switch (alignment_support_scheme
)
7715 case dr_unaligned_supported
:
7717 unsigned int align
, misalign
;
7719 align
= DR_TARGET_ALIGNMENT (dr
);
7720 if (alignment_support_scheme
== dr_aligned
)
7722 gcc_assert (aligned_access_p (first_dr
));
7725 else if (DR_MISALIGNMENT (first_dr
) == -1)
7727 align
= dr_alignment (vect_dr_behavior (first_dr
));
7731 misalign
= DR_MISALIGNMENT (first_dr
);
7732 if (dataref_offset
== NULL_TREE
7733 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7734 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7739 align
= least_bit_hwi (misalign
| align
);
7740 tree ptr
= build_int_cst (ref_type
, align
);
7742 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
7745 gimple_call_set_nothrow (call
, true);
7747 data_ref
= NULL_TREE
;
7752 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7755 : build_int_cst (ref_type
, 0));
7756 if (alignment_support_scheme
== dr_aligned
)
7758 else if (DR_MISALIGNMENT (first_dr
) == -1)
7759 TREE_TYPE (data_ref
)
7760 = build_aligned_type (TREE_TYPE (data_ref
),
7761 align
* BITS_PER_UNIT
);
7763 TREE_TYPE (data_ref
)
7764 = build_aligned_type (TREE_TYPE (data_ref
),
7765 TYPE_ALIGN (elem_type
));
7769 case dr_explicit_realign
:
7773 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7775 if (compute_in_loop
)
7776 msq
= vect_setup_realignment (first_stmt
, gsi
,
7778 dr_explicit_realign
,
7781 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7782 ptr
= copy_ssa_name (dataref_ptr
);
7784 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7785 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7786 new_stmt
= gimple_build_assign
7787 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7789 (TREE_TYPE (dataref_ptr
),
7790 -(HOST_WIDE_INT
) align
));
7791 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7793 = build2 (MEM_REF
, vectype
, ptr
,
7794 build_int_cst (ref_type
, 0));
7795 vec_dest
= vect_create_destination_var (scalar_dest
,
7797 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7798 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7799 gimple_assign_set_lhs (new_stmt
, new_temp
);
7800 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7801 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7802 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7805 bump
= size_binop (MULT_EXPR
, vs
,
7806 TYPE_SIZE_UNIT (elem_type
));
7807 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7808 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7809 new_stmt
= gimple_build_assign
7810 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7812 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
7813 ptr
= copy_ssa_name (ptr
, new_stmt
);
7814 gimple_assign_set_lhs (new_stmt
, ptr
);
7815 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7817 = build2 (MEM_REF
, vectype
, ptr
,
7818 build_int_cst (ref_type
, 0));
7821 case dr_explicit_realign_optimized
:
7823 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7824 new_temp
= copy_ssa_name (dataref_ptr
);
7826 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7827 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7828 new_stmt
= gimple_build_assign
7829 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7830 build_int_cst (TREE_TYPE (dataref_ptr
),
7831 -(HOST_WIDE_INT
) align
));
7832 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7834 = build2 (MEM_REF
, vectype
, new_temp
,
7835 build_int_cst (ref_type
, 0));
7841 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7842 /* DATA_REF is null if we've already built the statement. */
7844 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7845 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7846 gimple_set_lhs (new_stmt
, new_temp
);
7847 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7849 /* 3. Handle explicit realignment if necessary/supported.
7851 vec_dest = realign_load (msq, lsq, realignment_token) */
7852 if (alignment_support_scheme
== dr_explicit_realign_optimized
7853 || alignment_support_scheme
== dr_explicit_realign
)
7855 lsq
= gimple_assign_lhs (new_stmt
);
7856 if (!realignment_token
)
7857 realignment_token
= dataref_ptr
;
7858 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7859 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7860 msq
, lsq
, realignment_token
);
7861 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7862 gimple_assign_set_lhs (new_stmt
, new_temp
);
7863 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7865 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7868 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7869 add_phi_arg (phi
, lsq
,
7870 loop_latch_edge (containing_loop
),
7876 /* 4. Handle invariant-load. */
7877 if (inv_p
&& !bb_vinfo
)
7879 gcc_assert (!grouped_load
);
7880 /* If we have versioned for aliasing or the loop doesn't
7881 have any data dependencies that would preclude this,
7882 then we are sure this is a loop invariant load and
7883 thus we can insert it on the preheader edge. */
7884 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7885 && !nested_in_vect_loop
7886 && hoist_defs_of_uses (stmt
, loop
))
7888 if (dump_enabled_p ())
7890 dump_printf_loc (MSG_NOTE
, vect_location
,
7891 "hoisting out of the vectorized "
7893 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7895 tree tem
= copy_ssa_name (scalar_dest
);
7896 gsi_insert_on_edge_immediate
7897 (loop_preheader_edge (loop
),
7898 gimple_build_assign (tem
,
7900 (gimple_assign_rhs1 (stmt
))));
7901 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7902 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7903 set_vinfo_for_stmt (new_stmt
,
7904 new_stmt_vec_info (new_stmt
, vinfo
));
7908 gimple_stmt_iterator gsi2
= *gsi
;
7910 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7912 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7916 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7918 tree perm_mask
= perm_mask_for_reverse (vectype
);
7919 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7920 perm_mask
, stmt
, gsi
);
7921 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7924 /* Collect vector loads and later create their permutation in
7925 vect_transform_grouped_load (). */
7926 if (grouped_load
|| slp_perm
)
7927 dr_chain
.quick_push (new_temp
);
7929 /* Store vector loads in the corresponding SLP_NODE. */
7930 if (slp
&& !slp_perm
)
7931 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7933 /* With SLP permutation we load the gaps as well, without
7934 we need to skip the gaps after we manage to fully load
7935 all elements. group_gap_adj is GROUP_SIZE here. */
7936 group_elt
+= nunits
;
7937 if (maybe_ne (group_gap_adj
, 0U)
7939 && known_eq (group_elt
, group_size
- group_gap_adj
))
7941 poly_wide_int bump_val
7942 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7944 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7945 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7950 /* Bump the vector pointer to account for a gap or for excess
7951 elements loaded for a permuted SLP load. */
7952 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
7954 poly_wide_int bump_val
7955 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7957 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7958 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7963 if (slp
&& !slp_perm
)
7969 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7970 slp_node_instance
, false,
7973 dr_chain
.release ();
7981 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7982 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7983 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7988 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7990 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7991 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7994 dr_chain
.release ();
8000 /* Function vect_is_simple_cond.
8003 LOOP - the loop that is being vectorized.
8004 COND - Condition that is checked for simple use.
8007 *COMP_VECTYPE - the vector type for the comparison.
8008 *DTS - The def types for the arguments of the comparison
8010 Returns whether a COND can be vectorized. Checks whether
8011 condition operands are supportable using vec_is_simple_use. */
8014 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8015 tree
*comp_vectype
, enum vect_def_type
*dts
,
8019 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8022 if (TREE_CODE (cond
) == SSA_NAME
8023 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8025 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
8026 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
8027 &dts
[0], comp_vectype
)
8029 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8034 if (!COMPARISON_CLASS_P (cond
))
8037 lhs
= TREE_OPERAND (cond
, 0);
8038 rhs
= TREE_OPERAND (cond
, 1);
8040 if (TREE_CODE (lhs
) == SSA_NAME
)
8042 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
8043 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
8046 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8047 || TREE_CODE (lhs
) == FIXED_CST
)
8048 dts
[0] = vect_constant_def
;
8052 if (TREE_CODE (rhs
) == SSA_NAME
)
8054 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
8055 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
8058 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8059 || TREE_CODE (rhs
) == FIXED_CST
)
8060 dts
[1] = vect_constant_def
;
8064 if (vectype1
&& vectype2
8065 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8066 TYPE_VECTOR_SUBPARTS (vectype2
)))
8069 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8070 /* Invariant comparison. */
8071 if (! *comp_vectype
)
8073 tree scalar_type
= TREE_TYPE (lhs
);
8074 /* If we can widen the comparison to match vectype do so. */
8075 if (INTEGRAL_TYPE_P (scalar_type
)
8076 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8077 TYPE_SIZE (TREE_TYPE (vectype
))))
8078 scalar_type
= build_nonstandard_integer_type
8079 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8080 TYPE_UNSIGNED (scalar_type
));
8081 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8087 /* vectorizable_condition.
8089 Check if STMT is conditional modify expression that can be vectorized.
8090 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8091 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8094 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8095 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8096 else clause if it is 2).
8098 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8101 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8102 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
8105 tree scalar_dest
= NULL_TREE
;
8106 tree vec_dest
= NULL_TREE
;
8107 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8108 tree then_clause
, else_clause
;
8109 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8110 tree comp_vectype
= NULL_TREE
;
8111 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8112 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8115 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8116 enum vect_def_type dts
[4]
8117 = {vect_unknown_def_type
, vect_unknown_def_type
,
8118 vect_unknown_def_type
, vect_unknown_def_type
};
8121 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8122 stmt_vec_info prev_stmt_info
= NULL
;
8124 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8125 vec
<tree
> vec_oprnds0
= vNULL
;
8126 vec
<tree
> vec_oprnds1
= vNULL
;
8127 vec
<tree
> vec_oprnds2
= vNULL
;
8128 vec
<tree
> vec_oprnds3
= vNULL
;
8130 bool masked
= false;
8132 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
8135 vect_reduction_type reduction_type
8136 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8137 if (reduction_type
== TREE_CODE_REDUCTION
)
8139 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8142 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8143 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8147 /* FORNOW: not yet supported. */
8148 if (STMT_VINFO_LIVE_P (stmt_info
))
8150 if (dump_enabled_p ())
8151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8152 "value used after loop.\n");
8157 /* Is vectorizable conditional operation? */
8158 if (!is_gimple_assign (stmt
))
8161 code
= gimple_assign_rhs_code (stmt
);
8163 if (code
!= COND_EXPR
)
8166 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8167 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8172 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8174 gcc_assert (ncopies
>= 1);
8175 if (reduc_index
&& ncopies
> 1)
8176 return false; /* FORNOW */
8178 cond_expr
= gimple_assign_rhs1 (stmt
);
8179 then_clause
= gimple_assign_rhs2 (stmt
);
8180 else_clause
= gimple_assign_rhs3 (stmt
);
8182 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8183 &comp_vectype
, &dts
[0], vectype
)
8188 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
8191 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
8195 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8198 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8201 masked
= !COMPARISON_CLASS_P (cond_expr
);
8202 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8204 if (vec_cmp_type
== NULL_TREE
)
8207 cond_code
= TREE_CODE (cond_expr
);
8210 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8211 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8214 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8216 /* Boolean values may have another representation in vectors
8217 and therefore we prefer bit operations over comparison for
8218 them (which also works for scalar masks). We store opcodes
8219 to use in bitop1 and bitop2. Statement is vectorized as
8220 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8221 depending on bitop1 and bitop2 arity. */
8225 bitop1
= BIT_NOT_EXPR
;
8226 bitop2
= BIT_AND_EXPR
;
8229 bitop1
= BIT_NOT_EXPR
;
8230 bitop2
= BIT_IOR_EXPR
;
8233 bitop1
= BIT_NOT_EXPR
;
8234 bitop2
= BIT_AND_EXPR
;
8235 std::swap (cond_expr0
, cond_expr1
);
8238 bitop1
= BIT_NOT_EXPR
;
8239 bitop2
= BIT_IOR_EXPR
;
8240 std::swap (cond_expr0
, cond_expr1
);
8243 bitop1
= BIT_XOR_EXPR
;
8246 bitop1
= BIT_XOR_EXPR
;
8247 bitop2
= BIT_NOT_EXPR
;
8252 cond_code
= SSA_NAME
;
8257 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8258 if (bitop1
!= NOP_EXPR
)
8260 machine_mode mode
= TYPE_MODE (comp_vectype
);
8263 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8264 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8267 if (bitop2
!= NOP_EXPR
)
8269 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8271 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8275 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8278 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
8288 vec_oprnds0
.create (1);
8289 vec_oprnds1
.create (1);
8290 vec_oprnds2
.create (1);
8291 vec_oprnds3
.create (1);
8295 scalar_dest
= gimple_assign_lhs (stmt
);
8296 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
8297 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8299 /* Handle cond expr. */
8300 for (j
= 0; j
< ncopies
; j
++)
8302 gimple
*new_stmt
= NULL
;
8307 auto_vec
<tree
, 4> ops
;
8308 auto_vec
<vec
<tree
>, 4> vec_defs
;
8311 ops
.safe_push (cond_expr
);
8314 ops
.safe_push (cond_expr0
);
8315 ops
.safe_push (cond_expr1
);
8317 ops
.safe_push (then_clause
);
8318 ops
.safe_push (else_clause
);
8319 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8320 vec_oprnds3
= vec_defs
.pop ();
8321 vec_oprnds2
= vec_defs
.pop ();
8323 vec_oprnds1
= vec_defs
.pop ();
8324 vec_oprnds0
= vec_defs
.pop ();
8332 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8334 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8340 = vect_get_vec_def_for_operand (cond_expr0
,
8341 stmt
, comp_vectype
);
8342 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8345 = vect_get_vec_def_for_operand (cond_expr1
,
8346 stmt
, comp_vectype
);
8347 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8349 if (reduc_index
== 1)
8350 vec_then_clause
= reduc_def
;
8353 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8355 vect_is_simple_use (then_clause
, loop_vinfo
,
8358 if (reduc_index
== 2)
8359 vec_else_clause
= reduc_def
;
8362 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8364 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8371 = vect_get_vec_def_for_stmt_copy (dts
[0],
8372 vec_oprnds0
.pop ());
8375 = vect_get_vec_def_for_stmt_copy (dts
[1],
8376 vec_oprnds1
.pop ());
8378 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8379 vec_oprnds2
.pop ());
8380 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8381 vec_oprnds3
.pop ());
8386 vec_oprnds0
.quick_push (vec_cond_lhs
);
8388 vec_oprnds1
.quick_push (vec_cond_rhs
);
8389 vec_oprnds2
.quick_push (vec_then_clause
);
8390 vec_oprnds3
.quick_push (vec_else_clause
);
8393 /* Arguments are ready. Create the new vector stmt. */
8394 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8396 vec_then_clause
= vec_oprnds2
[i
];
8397 vec_else_clause
= vec_oprnds3
[i
];
8400 vec_compare
= vec_cond_lhs
;
8403 vec_cond_rhs
= vec_oprnds1
[i
];
8404 if (bitop1
== NOP_EXPR
)
8405 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8406 vec_cond_lhs
, vec_cond_rhs
);
8409 new_temp
= make_ssa_name (vec_cmp_type
);
8410 if (bitop1
== BIT_NOT_EXPR
)
8411 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8415 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8417 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8418 if (bitop2
== NOP_EXPR
)
8419 vec_compare
= new_temp
;
8420 else if (bitop2
== BIT_NOT_EXPR
)
8422 /* Instead of doing ~x ? y : z do x ? z : y. */
8423 vec_compare
= new_temp
;
8424 std::swap (vec_then_clause
, vec_else_clause
);
8428 vec_compare
= make_ssa_name (vec_cmp_type
);
8430 = gimple_build_assign (vec_compare
, bitop2
,
8431 vec_cond_lhs
, new_temp
);
8432 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8436 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
8438 if (!is_gimple_val (vec_compare
))
8440 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
8441 new_stmt
= gimple_build_assign (vec_compare_name
,
8443 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8444 vec_compare
= vec_compare_name
;
8446 gcc_assert (reduc_index
== 2);
8447 new_stmt
= gimple_build_call_internal
8448 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
8450 gimple_call_set_lhs (new_stmt
, scalar_dest
);
8451 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
8452 if (stmt
== gsi_stmt (*gsi
))
8453 vect_finish_replace_stmt (stmt
, new_stmt
);
8456 /* In this case we're moving the definition to later in the
8457 block. That doesn't matter because the only uses of the
8458 lhs are in phi statements. */
8459 gimple_stmt_iterator old_gsi
= gsi_for_stmt (stmt
);
8460 gsi_remove (&old_gsi
, true);
8461 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8466 new_temp
= make_ssa_name (vec_dest
);
8467 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8468 vec_compare
, vec_then_clause
,
8470 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8473 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8480 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8482 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8484 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8487 vec_oprnds0
.release ();
8488 vec_oprnds1
.release ();
8489 vec_oprnds2
.release ();
8490 vec_oprnds3
.release ();
8495 /* vectorizable_comparison.
8497 Check if STMT is comparison expression that can be vectorized.
8498 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8499 comparison, put it in VEC_STMT, and insert it at GSI.
8501 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8504 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8505 gimple
**vec_stmt
, tree reduc_def
,
8508 tree lhs
, rhs1
, rhs2
;
8509 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8510 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8511 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8512 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8514 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8515 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8519 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8520 stmt_vec_info prev_stmt_info
= NULL
;
8522 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8523 vec
<tree
> vec_oprnds0
= vNULL
;
8524 vec
<tree
> vec_oprnds1
= vNULL
;
8529 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8532 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8535 mask_type
= vectype
;
8536 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8541 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8543 gcc_assert (ncopies
>= 1);
8544 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8545 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8549 if (STMT_VINFO_LIVE_P (stmt_info
))
8551 if (dump_enabled_p ())
8552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8553 "value used after loop.\n");
8557 if (!is_gimple_assign (stmt
))
8560 code
= gimple_assign_rhs_code (stmt
);
8562 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8565 rhs1
= gimple_assign_rhs1 (stmt
);
8566 rhs2
= gimple_assign_rhs2 (stmt
);
8568 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8569 &dts
[0], &vectype1
))
8572 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8573 &dts
[1], &vectype2
))
8576 if (vectype1
&& vectype2
8577 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8578 TYPE_VECTOR_SUBPARTS (vectype2
)))
8581 vectype
= vectype1
? vectype1
: vectype2
;
8583 /* Invariant comparison. */
8586 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8587 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
8590 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
8593 /* Can't compare mask and non-mask types. */
8594 if (vectype1
&& vectype2
8595 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8598 /* Boolean values may have another representation in vectors
8599 and therefore we prefer bit operations over comparison for
8600 them (which also works for scalar masks). We store opcodes
8601 to use in bitop1 and bitop2. Statement is vectorized as
8602 BITOP2 (rhs1 BITOP1 rhs2) or
8603 rhs1 BITOP2 (BITOP1 rhs2)
8604 depending on bitop1 and bitop2 arity. */
8605 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8607 if (code
== GT_EXPR
)
8609 bitop1
= BIT_NOT_EXPR
;
8610 bitop2
= BIT_AND_EXPR
;
8612 else if (code
== GE_EXPR
)
8614 bitop1
= BIT_NOT_EXPR
;
8615 bitop2
= BIT_IOR_EXPR
;
8617 else if (code
== LT_EXPR
)
8619 bitop1
= BIT_NOT_EXPR
;
8620 bitop2
= BIT_AND_EXPR
;
8621 std::swap (rhs1
, rhs2
);
8622 std::swap (dts
[0], dts
[1]);
8624 else if (code
== LE_EXPR
)
8626 bitop1
= BIT_NOT_EXPR
;
8627 bitop2
= BIT_IOR_EXPR
;
8628 std::swap (rhs1
, rhs2
);
8629 std::swap (dts
[0], dts
[1]);
8633 bitop1
= BIT_XOR_EXPR
;
8634 if (code
== EQ_EXPR
)
8635 bitop2
= BIT_NOT_EXPR
;
8641 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8642 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8643 dts
, ndts
, NULL
, NULL
);
8644 if (bitop1
== NOP_EXPR
)
8645 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8648 machine_mode mode
= TYPE_MODE (vectype
);
8651 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8652 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8655 if (bitop2
!= NOP_EXPR
)
8657 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8658 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8668 vec_oprnds0
.create (1);
8669 vec_oprnds1
.create (1);
8673 lhs
= gimple_assign_lhs (stmt
);
8674 mask
= vect_create_destination_var (lhs
, mask_type
);
8676 /* Handle cmp expr. */
8677 for (j
= 0; j
< ncopies
; j
++)
8679 gassign
*new_stmt
= NULL
;
8684 auto_vec
<tree
, 2> ops
;
8685 auto_vec
<vec
<tree
>, 2> vec_defs
;
8687 ops
.safe_push (rhs1
);
8688 ops
.safe_push (rhs2
);
8689 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8690 vec_oprnds1
= vec_defs
.pop ();
8691 vec_oprnds0
= vec_defs
.pop ();
8695 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8696 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8701 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8702 vec_oprnds0
.pop ());
8703 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8704 vec_oprnds1
.pop ());
8709 vec_oprnds0
.quick_push (vec_rhs1
);
8710 vec_oprnds1
.quick_push (vec_rhs2
);
8713 /* Arguments are ready. Create the new vector stmt. */
8714 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8716 vec_rhs2
= vec_oprnds1
[i
];
8718 new_temp
= make_ssa_name (mask
);
8719 if (bitop1
== NOP_EXPR
)
8721 new_stmt
= gimple_build_assign (new_temp
, code
,
8722 vec_rhs1
, vec_rhs2
);
8723 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8727 if (bitop1
== BIT_NOT_EXPR
)
8728 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8730 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8732 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8733 if (bitop2
!= NOP_EXPR
)
8735 tree res
= make_ssa_name (mask
);
8736 if (bitop2
== BIT_NOT_EXPR
)
8737 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8739 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8741 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8745 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8752 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8754 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8756 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8759 vec_oprnds0
.release ();
8760 vec_oprnds1
.release ();
8765 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8766 can handle all live statements in the node. Otherwise return true
8767 if STMT is not live or if vectorizable_live_operation can handle it.
8768 GSI and VEC_STMT are as for vectorizable_live_operation. */
8771 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8772 slp_tree slp_node
, gimple
**vec_stmt
)
8778 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8780 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8781 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8782 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8787 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
8788 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
))
8794 /* Make sure the statement is vectorizable. */
8797 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
8798 slp_instance node_instance
)
8800 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8801 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8802 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8804 gimple
*pattern_stmt
;
8805 gimple_seq pattern_def_seq
;
8807 if (dump_enabled_p ())
8809 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8810 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8813 if (gimple_has_volatile_ops (stmt
))
8815 if (dump_enabled_p ())
8816 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8817 "not vectorized: stmt has volatile operands\n");
8822 /* Skip stmts that do not need to be vectorized. In loops this is expected
8824 - the COND_EXPR which is the loop exit condition
8825 - any LABEL_EXPRs in the loop
8826 - computations that are used only for array indexing or loop control.
8827 In basic blocks we only analyze statements that are a part of some SLP
8828 instance, therefore, all the statements are relevant.
8830 Pattern statement needs to be analyzed instead of the original statement
8831 if the original statement is not relevant. Otherwise, we analyze both
8832 statements. In basic blocks we are called from some SLP instance
8833 traversal, don't analyze pattern stmts instead, the pattern stmts
8834 already will be part of SLP instance. */
8836 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8837 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8838 && !STMT_VINFO_LIVE_P (stmt_info
))
8840 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8842 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8843 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8845 /* Analyze PATTERN_STMT instead of the original stmt. */
8846 stmt
= pattern_stmt
;
8847 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8848 if (dump_enabled_p ())
8850 dump_printf_loc (MSG_NOTE
, vect_location
,
8851 "==> examining pattern statement: ");
8852 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8857 if (dump_enabled_p ())
8858 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8863 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8866 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8867 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8869 /* Analyze PATTERN_STMT too. */
8870 if (dump_enabled_p ())
8872 dump_printf_loc (MSG_NOTE
, vect_location
,
8873 "==> examining pattern statement: ");
8874 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8877 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
8882 if (is_pattern_stmt_p (stmt_info
)
8884 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8886 gimple_stmt_iterator si
;
8888 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8890 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8891 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8892 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8894 /* Analyze def stmt of STMT if it's a pattern stmt. */
8895 if (dump_enabled_p ())
8897 dump_printf_loc (MSG_NOTE
, vect_location
,
8898 "==> examining pattern def statement: ");
8899 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8902 if (!vect_analyze_stmt (pattern_def_stmt
,
8903 need_to_vectorize
, node
, node_instance
))
8909 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8911 case vect_internal_def
:
8914 case vect_reduction_def
:
8915 case vect_nested_cycle
:
8916 gcc_assert (!bb_vinfo
8917 && (relevance
== vect_used_in_outer
8918 || relevance
== vect_used_in_outer_by_reduction
8919 || relevance
== vect_used_by_reduction
8920 || relevance
== vect_unused_in_scope
8921 || relevance
== vect_used_only_live
));
8924 case vect_induction_def
:
8925 gcc_assert (!bb_vinfo
);
8928 case vect_constant_def
:
8929 case vect_external_def
:
8930 case vect_unknown_def_type
:
8935 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8937 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8938 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8939 || (is_gimple_call (stmt
)
8940 && gimple_call_lhs (stmt
) == NULL_TREE
));
8941 *need_to_vectorize
= true;
8944 if (PURE_SLP_STMT (stmt_info
) && !node
)
8946 dump_printf_loc (MSG_NOTE
, vect_location
,
8947 "handled only by SLP analysis\n");
8953 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8954 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8955 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8956 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8957 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8958 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8959 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8960 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8961 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8962 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8963 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
)
8964 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
8965 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8966 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8970 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8971 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8972 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8973 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8974 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8975 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8976 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8977 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8978 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8979 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8984 if (dump_enabled_p ())
8986 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8987 "not vectorized: relevant stmt not ");
8988 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8989 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8998 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8999 need extra handling, except for vectorizable reductions. */
9000 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9001 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
))
9003 if (dump_enabled_p ())
9005 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9006 "not vectorized: live stmt not supported: ");
9007 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
9017 /* Function vect_transform_stmt.
9019 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9022 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9023 bool *grouped_store
, slp_tree slp_node
,
9024 slp_instance slp_node_instance
)
9026 bool is_store
= false;
9027 gimple
*vec_stmt
= NULL
;
9028 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9031 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9032 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9034 switch (STMT_VINFO_TYPE (stmt_info
))
9036 case type_demotion_vec_info_type
:
9037 case type_promotion_vec_info_type
:
9038 case type_conversion_vec_info_type
:
9039 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
9043 case induc_vec_info_type
:
9044 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
9048 case shift_vec_info_type
:
9049 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
9053 case op_vec_info_type
:
9054 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
9058 case assignment_vec_info_type
:
9059 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
9063 case load_vec_info_type
:
9064 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
9069 case store_vec_info_type
:
9070 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
9072 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9074 /* In case of interleaving, the whole chain is vectorized when the
9075 last store in the chain is reached. Store stmts before the last
9076 one are skipped, and there vec_stmt_info shouldn't be freed
9078 *grouped_store
= true;
9079 if (STMT_VINFO_VEC_STMT (stmt_info
))
9086 case condition_vec_info_type
:
9087 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
9091 case comparison_vec_info_type
:
9092 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
9096 case call_vec_info_type
:
9097 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
9098 stmt
= gsi_stmt (*gsi
);
9101 case call_simd_clone_vec_info_type
:
9102 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
9103 stmt
= gsi_stmt (*gsi
);
9106 case reduc_vec_info_type
:
9107 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
9113 if (!STMT_VINFO_LIVE_P (stmt_info
))
9115 if (dump_enabled_p ())
9116 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9117 "stmt not supported.\n");
9122 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9123 This would break hybrid SLP vectorization. */
9125 gcc_assert (!vec_stmt
9126 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
9128 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9129 is being vectorized, but outside the immediately enclosing loop. */
9131 && STMT_VINFO_LOOP_VINFO (stmt_info
)
9132 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
9133 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
9134 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9135 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9136 || STMT_VINFO_RELEVANT (stmt_info
) ==
9137 vect_used_in_outer_by_reduction
))
9139 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9140 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9141 imm_use_iterator imm_iter
;
9142 use_operand_p use_p
;
9146 if (dump_enabled_p ())
9147 dump_printf_loc (MSG_NOTE
, vect_location
,
9148 "Record the vdef for outer-loop vectorization.\n");
9150 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9151 (to be used when vectorizing outer-loop stmts that use the DEF of
9153 if (gimple_code (stmt
) == GIMPLE_PHI
)
9154 scalar_dest
= PHI_RESULT (stmt
);
9156 scalar_dest
= gimple_assign_lhs (stmt
);
9158 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9160 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9162 exit_phi
= USE_STMT (use_p
);
9163 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
9168 /* Handle stmts whose DEF is used outside the loop-nest that is
9169 being vectorized. */
9170 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9172 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
);
9177 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9183 /* Remove a group of stores (for SLP or interleaving), free their
9187 vect_remove_stores (gimple
*first_stmt
)
9189 gimple
*next
= first_stmt
;
9191 gimple_stmt_iterator next_si
;
9195 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
9197 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
9198 if (is_pattern_stmt_p (stmt_info
))
9199 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
9200 /* Free the attached stmt_vec_info and remove the stmt. */
9201 next_si
= gsi_for_stmt (next
);
9202 unlink_stmt_vdef (next
);
9203 gsi_remove (&next_si
, true);
9204 release_defs (next
);
9205 free_stmt_vec_info (next
);
9211 /* Function new_stmt_vec_info.
9213 Create and initialize a new stmt_vec_info struct for STMT. */
9216 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
9219 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
9221 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
9222 STMT_VINFO_STMT (res
) = stmt
;
9224 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
9225 STMT_VINFO_LIVE_P (res
) = false;
9226 STMT_VINFO_VECTYPE (res
) = NULL
;
9227 STMT_VINFO_VEC_STMT (res
) = NULL
;
9228 STMT_VINFO_VECTORIZABLE (res
) = true;
9229 STMT_VINFO_IN_PATTERN_P (res
) = false;
9230 STMT_VINFO_RELATED_STMT (res
) = NULL
;
9231 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
9232 STMT_VINFO_DATA_REF (res
) = NULL
;
9233 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
9234 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
9236 if (gimple_code (stmt
) == GIMPLE_PHI
9237 && is_loop_header_bb_p (gimple_bb (stmt
)))
9238 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
9240 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
9242 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
9243 STMT_SLP_TYPE (res
) = loop_vect
;
9244 STMT_VINFO_NUM_SLP_USES (res
) = 0;
9246 GROUP_FIRST_ELEMENT (res
) = NULL
;
9247 GROUP_NEXT_ELEMENT (res
) = NULL
;
9248 GROUP_SIZE (res
) = 0;
9249 GROUP_STORE_COUNT (res
) = 0;
9250 GROUP_GAP (res
) = 0;
9251 GROUP_SAME_DR_STMT (res
) = NULL
;
9257 /* Create a hash table for stmt_vec_info. */
9260 init_stmt_vec_info_vec (void)
9262 gcc_assert (!stmt_vec_info_vec
.exists ());
9263 stmt_vec_info_vec
.create (50);
9267 /* Free hash table for stmt_vec_info. */
9270 free_stmt_vec_info_vec (void)
9274 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
9276 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9277 gcc_assert (stmt_vec_info_vec
.exists ());
9278 stmt_vec_info_vec
.release ();
9282 /* Free stmt vectorization related info. */
9285 free_stmt_vec_info (gimple
*stmt
)
9287 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9292 /* Check if this statement has a related "pattern stmt"
9293 (introduced by the vectorizer during the pattern recognition
9294 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9296 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9298 stmt_vec_info patt_info
9299 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9302 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
9303 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9304 gimple_set_bb (patt_stmt
, NULL
);
9305 tree lhs
= gimple_get_lhs (patt_stmt
);
9306 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9307 release_ssa_name (lhs
);
9310 gimple_stmt_iterator si
;
9311 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
9313 gimple
*seq_stmt
= gsi_stmt (si
);
9314 gimple_set_bb (seq_stmt
, NULL
);
9315 lhs
= gimple_get_lhs (seq_stmt
);
9316 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9317 release_ssa_name (lhs
);
9318 free_stmt_vec_info (seq_stmt
);
9321 free_stmt_vec_info (patt_stmt
);
9325 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9326 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9327 set_vinfo_for_stmt (stmt
, NULL
);
9332 /* Function get_vectype_for_scalar_type_and_size.
9334 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9338 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9340 tree orig_scalar_type
= scalar_type
;
9341 scalar_mode inner_mode
;
9342 machine_mode simd_mode
;
9346 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9347 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9350 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9352 /* For vector types of elements whose mode precision doesn't
9353 match their types precision we use a element type of mode
9354 precision. The vectorization routines will have to make sure
9355 they support the proper result truncation/extension.
9356 We also make sure to build vector types with INTEGER_TYPE
9357 component type only. */
9358 if (INTEGRAL_TYPE_P (scalar_type
)
9359 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9360 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9361 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9362 TYPE_UNSIGNED (scalar_type
));
9364 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9365 When the component mode passes the above test simply use a type
9366 corresponding to that mode. The theory is that any use that
9367 would cause problems with this will disable vectorization anyway. */
9368 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9369 && !INTEGRAL_TYPE_P (scalar_type
))
9370 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9372 /* We can't build a vector type of elements with alignment bigger than
9374 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9375 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9376 TYPE_UNSIGNED (scalar_type
));
9378 /* If we felt back to using the mode fail if there was
9379 no scalar type for it. */
9380 if (scalar_type
== NULL_TREE
)
9383 /* If no size was supplied use the mode the target prefers. Otherwise
9384 lookup a vector mode of the specified size. */
9385 if (known_eq (size
, 0U))
9386 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9387 else if (!multiple_p (size
, nbytes
, &nunits
)
9388 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
9390 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9391 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
9394 vectype
= build_vector_type (scalar_type
, nunits
);
9396 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9397 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9400 /* Re-attach the address-space qualifier if we canonicalized the scalar
9402 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9403 return build_qualified_type
9404 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9409 poly_uint64 current_vector_size
;
9411 /* Function get_vectype_for_scalar_type.
9413 Returns the vector type corresponding to SCALAR_TYPE as supported
9417 get_vectype_for_scalar_type (tree scalar_type
)
9420 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9421 current_vector_size
);
9423 && known_eq (current_vector_size
, 0U))
9424 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9428 /* Function get_mask_type_for_scalar_type.
9430 Returns the mask type corresponding to a result of comparison
9431 of vectors of specified SCALAR_TYPE as supported by target. */
9434 get_mask_type_for_scalar_type (tree scalar_type
)
9436 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9441 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9442 current_vector_size
);
9445 /* Function get_same_sized_vectype
9447 Returns a vector type corresponding to SCALAR_TYPE of size
9448 VECTOR_TYPE if supported by the target. */
9451 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9453 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9454 return build_same_sized_truth_vector_type (vector_type
);
9456 return get_vectype_for_scalar_type_and_size
9457 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9460 /* Function vect_is_simple_use.
9463 VINFO - the vect info of the loop or basic block that is being vectorized.
9464 OPERAND - operand in the loop or bb.
9466 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9467 DT - the type of definition
9469 Returns whether a stmt with OPERAND can be vectorized.
9470 For loops, supportable operands are constants, loop invariants, and operands
9471 that are defined by the current iteration of the loop. Unsupportable
9472 operands are those that are defined by a previous iteration of the loop (as
9473 is the case in reduction/induction computations).
9474 For basic blocks, supportable operands are constants and bb invariants.
9475 For now, operands defined outside the basic block are not supported. */
9478 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9479 gimple
**def_stmt
, enum vect_def_type
*dt
)
9482 *dt
= vect_unknown_def_type
;
9484 if (dump_enabled_p ())
9486 dump_printf_loc (MSG_NOTE
, vect_location
,
9487 "vect_is_simple_use: operand ");
9488 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9489 dump_printf (MSG_NOTE
, "\n");
9492 if (CONSTANT_CLASS_P (operand
))
9494 *dt
= vect_constant_def
;
9498 if (is_gimple_min_invariant (operand
))
9500 *dt
= vect_external_def
;
9504 if (TREE_CODE (operand
) != SSA_NAME
)
9506 if (dump_enabled_p ())
9507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9512 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9514 *dt
= vect_external_def
;
9518 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9519 if (dump_enabled_p ())
9521 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9522 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9525 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9526 *dt
= vect_external_def
;
9529 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9530 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9533 if (dump_enabled_p ())
9535 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9538 case vect_uninitialized_def
:
9539 dump_printf (MSG_NOTE
, "uninitialized\n");
9541 case vect_constant_def
:
9542 dump_printf (MSG_NOTE
, "constant\n");
9544 case vect_external_def
:
9545 dump_printf (MSG_NOTE
, "external\n");
9547 case vect_internal_def
:
9548 dump_printf (MSG_NOTE
, "internal\n");
9550 case vect_induction_def
:
9551 dump_printf (MSG_NOTE
, "induction\n");
9553 case vect_reduction_def
:
9554 dump_printf (MSG_NOTE
, "reduction\n");
9556 case vect_double_reduction_def
:
9557 dump_printf (MSG_NOTE
, "double reduction\n");
9559 case vect_nested_cycle
:
9560 dump_printf (MSG_NOTE
, "nested cycle\n");
9562 case vect_unknown_def_type
:
9563 dump_printf (MSG_NOTE
, "unknown\n");
9568 if (*dt
== vect_unknown_def_type
)
9570 if (dump_enabled_p ())
9571 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9572 "Unsupported pattern.\n");
9576 switch (gimple_code (*def_stmt
))
9583 if (dump_enabled_p ())
9584 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9585 "unsupported defining stmt:\n");
9592 /* Function vect_is_simple_use.
9594 Same as vect_is_simple_use but also determines the vector operand
9595 type of OPERAND and stores it to *VECTYPE. If the definition of
9596 OPERAND is vect_uninitialized_def, vect_constant_def or
9597 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9598 is responsible to compute the best suited vector type for the
9602 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9603 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9605 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9608 /* Now get a vector type if the def is internal, otherwise supply
9609 NULL_TREE and leave it up to the caller to figure out a proper
9610 type for the use stmt. */
9611 if (*dt
== vect_internal_def
9612 || *dt
== vect_induction_def
9613 || *dt
== vect_reduction_def
9614 || *dt
== vect_double_reduction_def
9615 || *dt
== vect_nested_cycle
)
9617 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9619 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9620 && !STMT_VINFO_RELEVANT (stmt_info
)
9621 && !STMT_VINFO_LIVE_P (stmt_info
))
9622 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9624 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9625 gcc_assert (*vectype
!= NULL_TREE
);
9627 else if (*dt
== vect_uninitialized_def
9628 || *dt
== vect_constant_def
9629 || *dt
== vect_external_def
)
9630 *vectype
= NULL_TREE
;
9638 /* Function supportable_widening_operation
9640 Check whether an operation represented by the code CODE is a
9641 widening operation that is supported by the target platform in
9642 vector form (i.e., when operating on arguments of type VECTYPE_IN
9643 producing a result of type VECTYPE_OUT).
9645 Widening operations we currently support are NOP (CONVERT), FLOAT
9646 and WIDEN_MULT. This function checks if these operations are supported
9647 by the target platform either directly (via vector tree-codes), or via
9651 - CODE1 and CODE2 are codes of vector operations to be used when
9652 vectorizing the operation, if available.
9653 - MULTI_STEP_CVT determines the number of required intermediate steps in
9654 case of multi-step conversion (like char->short->int - in that case
9655 MULTI_STEP_CVT will be 1).
9656 - INTERM_TYPES contains the intermediate type required to perform the
9657 widening operation (short in the above example). */
9660 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9661 tree vectype_out
, tree vectype_in
,
9662 enum tree_code
*code1
, enum tree_code
*code2
,
9663 int *multi_step_cvt
,
9664 vec
<tree
> *interm_types
)
9666 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9667 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9668 struct loop
*vect_loop
= NULL
;
9669 machine_mode vec_mode
;
9670 enum insn_code icode1
, icode2
;
9671 optab optab1
, optab2
;
9672 tree vectype
= vectype_in
;
9673 tree wide_vectype
= vectype_out
;
9674 enum tree_code c1
, c2
;
9676 tree prev_type
, intermediate_type
;
9677 machine_mode intermediate_mode
, prev_mode
;
9678 optab optab3
, optab4
;
9680 *multi_step_cvt
= 0;
9682 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9686 case WIDEN_MULT_EXPR
:
9687 /* The result of a vectorized widening operation usually requires
9688 two vectors (because the widened results do not fit into one vector).
9689 The generated vector results would normally be expected to be
9690 generated in the same order as in the original scalar computation,
9691 i.e. if 8 results are generated in each vector iteration, they are
9692 to be organized as follows:
9693 vect1: [res1,res2,res3,res4],
9694 vect2: [res5,res6,res7,res8].
9696 However, in the special case that the result of the widening
9697 operation is used in a reduction computation only, the order doesn't
9698 matter (because when vectorizing a reduction we change the order of
9699 the computation). Some targets can take advantage of this and
9700 generate more efficient code. For example, targets like Altivec,
9701 that support widen_mult using a sequence of {mult_even,mult_odd}
9702 generate the following vectors:
9703 vect1: [res1,res3,res5,res7],
9704 vect2: [res2,res4,res6,res8].
9706 When vectorizing outer-loops, we execute the inner-loop sequentially
9707 (each vectorized inner-loop iteration contributes to VF outer-loop
9708 iterations in parallel). We therefore don't allow to change the
9709 order of the computation in the inner-loop during outer-loop
9711 /* TODO: Another case in which order doesn't *really* matter is when we
9712 widen and then contract again, e.g. (short)((int)x * y >> 8).
9713 Normally, pack_trunc performs an even/odd permute, whereas the
9714 repack from an even/odd expansion would be an interleave, which
9715 would be significantly simpler for e.g. AVX2. */
9716 /* In any case, in order to avoid duplicating the code below, recurse
9717 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9718 are properly set up for the caller. If we fail, we'll continue with
9719 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9721 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9722 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9723 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9724 stmt
, vectype_out
, vectype_in
,
9725 code1
, code2
, multi_step_cvt
,
9728 /* Elements in a vector with vect_used_by_reduction property cannot
9729 be reordered if the use chain with this property does not have the
9730 same operation. One such an example is s += a * b, where elements
9731 in a and b cannot be reordered. Here we check if the vector defined
9732 by STMT is only directly used in the reduction statement. */
9733 tree lhs
= gimple_assign_lhs (stmt
);
9734 use_operand_p dummy
;
9736 stmt_vec_info use_stmt_info
= NULL
;
9737 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9738 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9739 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9742 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9743 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9756 case VEC_WIDEN_MULT_EVEN_EXPR
:
9757 /* Support the recursion induced just above. */
9758 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9759 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9762 case WIDEN_LSHIFT_EXPR
:
9763 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9764 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9768 c1
= VEC_UNPACK_LO_EXPR
;
9769 c2
= VEC_UNPACK_HI_EXPR
;
9773 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9774 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9777 case FIX_TRUNC_EXPR
:
9778 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9779 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9780 computing the operation. */
9787 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9790 if (code
== FIX_TRUNC_EXPR
)
9792 /* The signedness is determined from output operand. */
9793 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9794 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9798 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9799 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9802 if (!optab1
|| !optab2
)
9805 vec_mode
= TYPE_MODE (vectype
);
9806 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9807 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9813 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9814 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9815 /* For scalar masks we may have different boolean
9816 vector types having the same QImode. Thus we
9817 add additional check for elements number. */
9818 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9819 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
9820 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
9822 /* Check if it's a multi-step conversion that can be done using intermediate
9825 prev_type
= vectype
;
9826 prev_mode
= vec_mode
;
9828 if (!CONVERT_EXPR_CODE_P (code
))
9831 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9832 intermediate steps in promotion sequence. We try
9833 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9835 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9836 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9838 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9839 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9841 intermediate_type
= vect_halve_mask_nunits (prev_type
);
9842 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9847 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9848 TYPE_UNSIGNED (prev_type
));
9850 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9851 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9853 if (!optab3
|| !optab4
9854 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9855 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9856 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9857 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9858 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9859 == CODE_FOR_nothing
)
9860 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9861 == CODE_FOR_nothing
))
9864 interm_types
->quick_push (intermediate_type
);
9865 (*multi_step_cvt
)++;
9867 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9868 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9869 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9870 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
9871 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
9873 prev_type
= intermediate_type
;
9874 prev_mode
= intermediate_mode
;
9877 interm_types
->release ();
9882 /* Function supportable_narrowing_operation
9884 Check whether an operation represented by the code CODE is a
9885 narrowing operation that is supported by the target platform in
9886 vector form (i.e., when operating on arguments of type VECTYPE_IN
9887 and producing a result of type VECTYPE_OUT).
9889 Narrowing operations we currently support are NOP (CONVERT) and
9890 FIX_TRUNC. This function checks if these operations are supported by
9891 the target platform directly via vector tree-codes.
9894 - CODE1 is the code of a vector operation to be used when
9895 vectorizing the operation, if available.
9896 - MULTI_STEP_CVT determines the number of required intermediate steps in
9897 case of multi-step conversion (like int->short->char - in that case
9898 MULTI_STEP_CVT will be 1).
9899 - INTERM_TYPES contains the intermediate type required to perform the
9900 narrowing operation (short in the above example). */
9903 supportable_narrowing_operation (enum tree_code code
,
9904 tree vectype_out
, tree vectype_in
,
9905 enum tree_code
*code1
, int *multi_step_cvt
,
9906 vec
<tree
> *interm_types
)
9908 machine_mode vec_mode
;
9909 enum insn_code icode1
;
9910 optab optab1
, interm_optab
;
9911 tree vectype
= vectype_in
;
9912 tree narrow_vectype
= vectype_out
;
9914 tree intermediate_type
, prev_type
;
9915 machine_mode intermediate_mode
, prev_mode
;
9919 *multi_step_cvt
= 0;
9923 c1
= VEC_PACK_TRUNC_EXPR
;
9926 case FIX_TRUNC_EXPR
:
9927 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9931 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9932 tree code and optabs used for computing the operation. */
9939 if (code
== FIX_TRUNC_EXPR
)
9940 /* The signedness is determined from output operand. */
9941 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9943 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9948 vec_mode
= TYPE_MODE (vectype
);
9949 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9954 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9955 /* For scalar masks we may have different boolean
9956 vector types having the same QImode. Thus we
9957 add additional check for elements number. */
9958 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9959 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
9960 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9962 /* Check if it's a multi-step conversion that can be done using intermediate
9964 prev_mode
= vec_mode
;
9965 prev_type
= vectype
;
9966 if (code
== FIX_TRUNC_EXPR
)
9967 uns
= TYPE_UNSIGNED (vectype_out
);
9969 uns
= TYPE_UNSIGNED (vectype
);
9971 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9972 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9973 costly than signed. */
9974 if (code
== FIX_TRUNC_EXPR
&& uns
)
9976 enum insn_code icode2
;
9979 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9981 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9982 if (interm_optab
!= unknown_optab
9983 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9984 && insn_data
[icode1
].operand
[0].mode
9985 == insn_data
[icode2
].operand
[0].mode
)
9988 optab1
= interm_optab
;
9993 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9994 intermediate steps in promotion sequence. We try
9995 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9996 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9997 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9999 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10000 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10002 intermediate_type
= vect_double_mask_nunits (prev_type
);
10003 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10008 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10010 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10013 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10014 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10015 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10016 == CODE_FOR_nothing
))
10019 interm_types
->quick_push (intermediate_type
);
10020 (*multi_step_cvt
)++;
10022 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10023 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10024 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10025 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10027 prev_mode
= intermediate_mode
;
10028 prev_type
= intermediate_type
;
10029 optab1
= interm_optab
;
10032 interm_types
->release ();
10036 /* Generate and return a statement that sets vector mask MASK such that
10037 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10040 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10042 tree cmp_type
= TREE_TYPE (start_index
);
10043 tree mask_type
= TREE_TYPE (mask
);
10044 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10045 cmp_type
, mask_type
,
10046 OPTIMIZE_FOR_SPEED
));
10047 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10048 start_index
, end_index
,
10049 build_zero_cst (mask_type
));
10050 gimple_call_set_lhs (call
, mask
);
10054 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10055 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10058 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10061 tree tmp
= make_ssa_name (mask_type
);
10062 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10063 gimple_seq_add_stmt (seq
, call
);
10064 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);