1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
94 if ((kind
== vector_load
|| kind
== unaligned_load
)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
96 kind
= vector_gather_load
;
97 if ((kind
== vector_store
|| kind
== unaligned_store
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_scatter_store
;
102 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
103 stmt_info_for_cost si
= { count
, kind
,
104 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
106 body_cost_vec
->safe_push (si
);
108 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
111 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
112 count
, kind
, stmt_info
, misalign
, where
);
115 /* Return a variable of type ELEM_TYPE[NELEMS]. */
118 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
120 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
124 /* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
130 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
131 tree array
, unsigned HOST_WIDE_INT n
)
133 tree vect_type
, vect
, vect_name
, array_ref
;
136 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
137 vect_type
= TREE_TYPE (TREE_TYPE (array
));
138 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
139 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
140 build_int_cst (size_type_node
, n
),
141 NULL_TREE
, NULL_TREE
);
143 new_stmt
= gimple_build_assign (vect
, array_ref
);
144 vect_name
= make_ssa_name (vect
, new_stmt
);
145 gimple_assign_set_lhs (new_stmt
, vect_name
);
146 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
151 /* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
156 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
157 tree array
, unsigned HOST_WIDE_INT n
)
162 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
163 build_int_cst (size_type_node
, n
),
164 NULL_TREE
, NULL_TREE
);
166 new_stmt
= gimple_build_assign (array_ref
, vect
);
167 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
170 /* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
175 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
179 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
187 /* Function vect_mark_relevant.
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
192 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
193 enum vect_relevant relevant
, bool live_p
)
195 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
196 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
197 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
198 gimple
*pattern_stmt
;
200 if (dump_enabled_p ())
202 dump_printf_loc (MSG_NOTE
, vect_location
,
203 "mark relevant %d, live %d: ", relevant
, live_p
);
204 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
213 /* This is the last stmt in a sequence that was detected as a
214 pattern that can potentially be vectorized. Don't mark the stmt
215 as relevant/live because it's not going to be vectorized.
216 Instead mark the pattern-stmt that replaces it. */
218 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE
, vect_location
,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_info
= vinfo_for_stmt (pattern_stmt
);
225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
226 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
227 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
231 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
232 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
233 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
235 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE
, vect_location
,
240 "already marked relevant/live.\n");
244 worklist
->safe_push (stmt
);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT is simple and all uses of it are invariant. */
253 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
259 if (!is_gimple_assign (stmt
))
262 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
264 enum vect_def_type dt
= vect_uninitialized_def
;
266 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
270 "use not simple.\n");
274 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
293 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
294 enum vect_relevant
*relevant
, bool *live_p
)
296 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
298 imm_use_iterator imm_iter
;
302 *relevant
= vect_unused_in_scope
;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt
)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
308 != loop_exit_ctrl_vec_info_type
)
309 *relevant
= vect_used_in_scope
;
311 /* changing memory. */
312 if (gimple_code (stmt
) != GIMPLE_PHI
)
313 if (gimple_vdef (stmt
)
314 && !gimple_clobber_p (stmt
))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE
, vect_location
,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant
= vect_used_in_scope
;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
325 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
327 basic_block bb
= gimple_bb (USE_STMT (use_p
));
328 if (!flow_bb_inside_loop_p (loop
, bb
))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE
, vect_location
,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p
)))
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
340 gcc_assert (bb
== single_exit (loop
)->dest
);
347 if (*live_p
&& *relevant
== vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE
, vect_location
,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant
= vect_used_only_live
;
356 return (*live_p
|| *relevant
);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT. Check if USE is
363 used in STMT for anything other than indexing an array. */
366 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
371 /* USE corresponds to some operand in STMT. If there is no data
372 reference in STMT, then any operand that corresponds to USE
373 is not indexing an array. */
374 if (!STMT_VINFO_DATA_REF (stmt_info
))
377 /* STMT has a data_ref. FORNOW this means that its of one of
381 (This should have been verified in analyze_data_refs).
383 'var' in the second case corresponds to a def, not a use,
384 so USE cannot correspond to any operands that are not used
387 Therefore, all we need to check is if STMT falls into the
388 first case, and whether var corresponds to USE. */
390 if (!gimple_assign_copy_p (stmt
))
392 if (is_gimple_call (stmt
)
393 && gimple_call_internal_p (stmt
))
394 switch (gimple_call_internal_fn (stmt
))
397 operand
= gimple_call_arg (stmt
, 3);
402 operand
= gimple_call_arg (stmt
, 2);
412 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
414 operand
= gimple_assign_rhs1 (stmt
);
415 if (TREE_CODE (operand
) != SSA_NAME
)
426 Function process_use.
429 - a USE in STMT in a loop represented by LOOP_VINFO
430 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
431 that defined USE. This is done by calling mark_relevant and passing it
432 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
433 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
437 Generally, LIVE_P and RELEVANT are used to define the liveness and
438 relevance info of the DEF_STMT of this USE:
439 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
440 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
442 - case 1: If USE is used only for address computations (e.g. array indexing),
443 which does not need to be directly vectorized, then the liveness/relevance
444 of the respective DEF_STMT is left unchanged.
445 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
446 skip DEF_STMT cause it had already been processed.
447 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
448 be modified accordingly.
450 Return true if everything is as expected. Return false otherwise. */
453 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
454 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
457 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
458 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
459 stmt_vec_info dstmt_vinfo
;
460 basic_block bb
, def_bb
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
469 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
473 "not vectorized: unsupported use in stmt.\n");
477 if (!def_stmt
|| gimple_nop_p (def_stmt
))
480 def_bb
= gimple_bb (def_stmt
);
481 if (!flow_bb_inside_loop_p (loop
, def_bb
))
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
488 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
489 DEF_STMT must have already been processed, because this should be the
490 only way that STMT, which is a reduction-phi, was put in the worklist,
491 as there should be no other uses for DEF_STMT in the loop. So we just
492 check that everything is as expected, and we are done. */
493 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
494 bb
= gimple_bb (stmt
);
495 if (gimple_code (stmt
) == GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
497 && gimple_code (def_stmt
) != GIMPLE_PHI
498 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
499 && bb
->loop_father
== def_bb
->loop_father
)
501 if (dump_enabled_p ())
502 dump_printf_loc (MSG_NOTE
, vect_location
,
503 "reduc-stmt defining reduc-phi in the same nest.\n");
504 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
505 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
506 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
507 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
508 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
512 /* case 3a: outer-loop stmt defining an inner-loop stmt:
513 outer-loop-header-bb:
519 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
521 if (dump_enabled_p ())
522 dump_printf_loc (MSG_NOTE
, vect_location
,
523 "outer-loop def-stmt defining inner-loop stmt.\n");
527 case vect_unused_in_scope
:
528 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
529 vect_used_in_scope
: vect_unused_in_scope
;
532 case vect_used_in_outer_by_reduction
:
533 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
534 relevant
= vect_used_by_reduction
;
537 case vect_used_in_outer
:
538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
539 relevant
= vect_used_in_scope
;
542 case vect_used_in_scope
:
550 /* case 3b: inner-loop stmt defining an outer-loop stmt:
551 outer-loop-header-bb:
555 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
557 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
559 if (dump_enabled_p ())
560 dump_printf_loc (MSG_NOTE
, vect_location
,
561 "inner-loop def-stmt defining outer-loop stmt.\n");
565 case vect_unused_in_scope
:
566 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
567 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
568 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
571 case vect_used_by_reduction
:
572 case vect_used_only_live
:
573 relevant
= vect_used_in_outer_by_reduction
;
576 case vect_used_in_scope
:
577 relevant
= vect_used_in_outer
;
584 /* We are also not interested in uses on loop PHI backedges that are
585 inductions. Otherwise we'll needlessly vectorize the IV increment
586 and cause hybrid SLP for SLP inductions. Unless the PHI is live
588 else if (gimple_code (stmt
) == GIMPLE_PHI
589 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
590 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
591 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
594 if (dump_enabled_p ())
595 dump_printf_loc (MSG_NOTE
, vect_location
,
596 "induction value on backedge.\n");
601 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
606 /* Function vect_mark_stmts_to_be_vectorized.
608 Not all stmts in the loop need to be vectorized. For example:
617 Stmt 1 and 3 do not need to be vectorized, because loop control and
618 addressing of vectorized data-refs are handled differently.
620 This pass detects such stmts. */
623 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
625 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
626 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
627 unsigned int nbbs
= loop
->num_nodes
;
628 gimple_stmt_iterator si
;
631 stmt_vec_info stmt_vinfo
;
635 enum vect_relevant relevant
;
637 if (dump_enabled_p ())
638 dump_printf_loc (MSG_NOTE
, vect_location
,
639 "=== vect_mark_stmts_to_be_vectorized ===\n");
641 auto_vec
<gimple
*, 64> worklist
;
643 /* 1. Init worklist. */
644 for (i
= 0; i
< nbbs
; i
++)
647 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
650 if (dump_enabled_p ())
652 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
653 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
656 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
657 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
659 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
661 stmt
= gsi_stmt (si
);
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
668 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
669 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
673 /* 2. Process_worklist */
674 while (worklist
.length () > 0)
679 stmt
= worklist
.pop ();
680 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
683 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
686 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
687 (DEF_STMT) as relevant/irrelevant according to the relevance property
689 stmt_vinfo
= vinfo_for_stmt (stmt
);
690 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
692 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
693 propagated as is to the DEF_STMTs of its USEs.
695 One exception is when STMT has been identified as defining a reduction
696 variable; in this case we set the relevance to vect_used_by_reduction.
697 This is because we distinguish between two kinds of relevant stmts -
698 those that are used by a reduction computation, and those that are
699 (also) used by a regular computation. This allows us later on to
700 identify stmts that are used solely by a reduction, and therefore the
701 order of the results that they produce does not have to be kept. */
703 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
705 case vect_reduction_def
:
706 gcc_assert (relevant
!= vect_unused_in_scope
);
707 if (relevant
!= vect_unused_in_scope
708 && relevant
!= vect_used_in_scope
709 && relevant
!= vect_used_by_reduction
710 && relevant
!= vect_used_only_live
)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
714 "unsupported use of reduction.\n");
719 case vect_nested_cycle
:
720 if (relevant
!= vect_unused_in_scope
721 && relevant
!= vect_used_in_outer_by_reduction
722 && relevant
!= vect_used_in_outer
)
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
726 "unsupported use of nested cycle.\n");
732 case vect_double_reduction_def
:
733 if (relevant
!= vect_unused_in_scope
734 && relevant
!= vect_used_by_reduction
735 && relevant
!= vect_used_only_live
)
737 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
739 "unsupported use of double reduction.\n");
749 if (is_pattern_stmt_p (stmt_vinfo
))
751 /* Pattern statements are not inserted into the code, so
752 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
753 have to scan the RHS or function arguments instead. */
754 if (is_gimple_assign (stmt
))
756 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
757 tree op
= gimple_assign_rhs1 (stmt
);
760 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
762 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
763 relevant
, &worklist
, false)
764 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
765 relevant
, &worklist
, false))
769 for (; i
< gimple_num_ops (stmt
); i
++)
771 op
= gimple_op (stmt
, i
);
772 if (TREE_CODE (op
) == SSA_NAME
773 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
778 else if (is_gimple_call (stmt
))
780 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
782 tree arg
= gimple_call_arg (stmt
, i
);
783 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
790 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
792 tree op
= USE_FROM_PTR (use_p
);
793 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
798 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
800 gather_scatter_info gs_info
;
801 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
803 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
807 } /* while worklist */
813 /* Function vect_model_simple_cost.
815 Models cost for simple operations, i.e. those that only emit ncopies of a
816 single op. Right now, this does not account for multiple insns that could
817 be generated for the single vector op. We will handle that shortly. */
820 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
821 enum vect_def_type
*dt
,
823 stmt_vector_for_cost
*prologue_cost_vec
,
824 stmt_vector_for_cost
*body_cost_vec
)
827 int inside_cost
= 0, prologue_cost
= 0;
829 /* The SLP costs were already calculated during SLP tree build. */
830 if (PURE_SLP_STMT (stmt_info
))
833 /* Cost the "broadcast" of a scalar operand in to a vector operand.
834 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
836 for (i
= 0; i
< ndts
; i
++)
837 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
838 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
839 stmt_info
, 0, vect_prologue
);
841 /* Pass the inside-of-loop statements to the target-specific cost model. */
842 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
843 stmt_info
, 0, vect_body
);
845 if (dump_enabled_p ())
846 dump_printf_loc (MSG_NOTE
, vect_location
,
847 "vect_model_simple_cost: inside_cost = %d, "
848 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
852 /* Model cost for type demotion and promotion operations. PWR is normally
853 zero for single-step promotions and demotions. It will be one if
854 two-step promotion/demotion is required, and so on. Each additional
855 step doubles the number of instructions required. */
858 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
859 enum vect_def_type
*dt
, int pwr
)
862 int inside_cost
= 0, prologue_cost
= 0;
863 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
864 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
865 void *target_cost_data
;
867 /* The SLP costs were already calculated during SLP tree build. */
868 if (PURE_SLP_STMT (stmt_info
))
872 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
874 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
876 for (i
= 0; i
< pwr
+ 1; i
++)
878 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
880 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
881 vec_promote_demote
, stmt_info
, 0,
885 /* FORNOW: Assuming maximum 2 args per stmts. */
886 for (i
= 0; i
< 2; i
++)
887 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
888 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
889 stmt_info
, 0, vect_prologue
);
891 if (dump_enabled_p ())
892 dump_printf_loc (MSG_NOTE
, vect_location
,
893 "vect_model_promotion_demotion_cost: inside_cost = %d, "
894 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
897 /* Function vect_model_store_cost
899 Models cost for stores. In the case of grouped accesses, one access
900 has the overhead of the grouped access attributed to it. */
903 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
904 vect_memory_access_type memory_access_type
,
905 vec_load_store_type vls_type
, slp_tree slp_node
,
906 stmt_vector_for_cost
*prologue_cost_vec
,
907 stmt_vector_for_cost
*body_cost_vec
)
909 unsigned int inside_cost
= 0, prologue_cost
= 0;
910 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
911 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
912 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
914 if (vls_type
== VLS_STORE_INVARIANT
)
915 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
916 stmt_info
, 0, vect_prologue
);
918 /* Grouped stores update all elements in the group at once,
919 so we want the DR for the first statement. */
920 if (!slp_node
&& grouped_access_p
)
922 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
923 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
926 /* True if we should include any once-per-group costs as well as
927 the cost of the statement itself. For SLP we only get called
928 once per group anyhow. */
929 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
931 /* We assume that the cost of a single store-lanes instruction is
932 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
933 access is instead being provided by a permute-and-store operation,
934 include the cost of the permutes. */
936 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
938 /* Uses a high and low interleave or shuffle operations for each
940 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
941 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
942 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
943 stmt_info
, 0, vect_body
);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE
, vect_location
,
947 "vect_model_store_cost: strided group_size = %d .\n",
951 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
952 /* Costs of the stores. */
953 if (memory_access_type
== VMAT_ELEMENTWISE
954 || memory_access_type
== VMAT_GATHER_SCATTER
)
956 /* N scalar stores plus extracting the elements. */
957 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
958 inside_cost
+= record_stmt_cost (body_cost_vec
,
959 ncopies
* assumed_nunits
,
960 scalar_store
, stmt_info
, 0, vect_body
);
963 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
965 if (memory_access_type
== VMAT_ELEMENTWISE
966 || memory_access_type
== VMAT_STRIDED_SLP
)
968 /* N scalar stores plus extracting the elements. */
969 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
970 inside_cost
+= record_stmt_cost (body_cost_vec
,
971 ncopies
* assumed_nunits
,
972 vec_to_scalar
, stmt_info
, 0, vect_body
);
975 if (dump_enabled_p ())
976 dump_printf_loc (MSG_NOTE
, vect_location
,
977 "vect_model_store_cost: inside_cost = %d, "
978 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
982 /* Calculate cost of DR's memory access. */
984 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
985 unsigned int *inside_cost
,
986 stmt_vector_for_cost
*body_cost_vec
)
988 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
989 gimple
*stmt
= DR_STMT (dr
);
990 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
992 switch (alignment_support_scheme
)
996 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
997 vector_store
, stmt_info
, 0,
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE
, vect_location
,
1002 "vect_model_store_cost: aligned.\n");
1006 case dr_unaligned_supported
:
1008 /* Here, we assign an additional cost for the unaligned store. */
1009 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1010 unaligned_store
, stmt_info
,
1011 DR_MISALIGNMENT (dr
), vect_body
);
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE
, vect_location
,
1014 "vect_model_store_cost: unaligned supported by "
1019 case dr_unaligned_unsupported
:
1021 *inside_cost
= VECT_MAX_COST
;
1023 if (dump_enabled_p ())
1024 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1025 "vect_model_store_cost: unsupported access.\n");
1035 /* Function vect_model_load_cost
1037 Models cost for loads. In the case of grouped accesses, one access has
1038 the overhead of the grouped access attributed to it. Since unaligned
1039 accesses are supported for loads, we also account for the costs of the
1040 access scheme chosen. */
1043 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1044 vect_memory_access_type memory_access_type
,
1046 stmt_vector_for_cost
*prologue_cost_vec
,
1047 stmt_vector_for_cost
*body_cost_vec
)
1049 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1050 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1051 unsigned int inside_cost
= 0, prologue_cost
= 0;
1052 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1054 /* Grouped loads read all elements in the group at once,
1055 so we want the DR for the first statement. */
1056 if (!slp_node
&& grouped_access_p
)
1058 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1059 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1062 /* True if we should include any once-per-group costs as well as
1063 the cost of the statement itself. For SLP we only get called
1064 once per group anyhow. */
1065 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1072 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1074 /* Uses an even and odd extract operations or shuffle operations
1075 for each needed permute. */
1076 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1077 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1078 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1079 stmt_info
, 0, vect_body
);
1081 if (dump_enabled_p ())
1082 dump_printf_loc (MSG_NOTE
, vect_location
,
1083 "vect_model_load_cost: strided group_size = %d .\n",
1087 /* The loads themselves. */
1088 if (memory_access_type
== VMAT_ELEMENTWISE
1089 || memory_access_type
== VMAT_GATHER_SCATTER
)
1091 /* N scalar loads plus gathering them into a vector. */
1092 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1093 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1094 inside_cost
+= record_stmt_cost (body_cost_vec
,
1095 ncopies
* assumed_nunits
,
1096 scalar_load
, stmt_info
, 0, vect_body
);
1099 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1100 &inside_cost
, &prologue_cost
,
1101 prologue_cost_vec
, body_cost_vec
, true);
1102 if (memory_access_type
== VMAT_ELEMENTWISE
1103 || memory_access_type
== VMAT_STRIDED_SLP
)
1104 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1105 stmt_info
, 0, vect_body
);
1107 if (dump_enabled_p ())
1108 dump_printf_loc (MSG_NOTE
, vect_location
,
1109 "vect_model_load_cost: inside_cost = %d, "
1110 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1114 /* Calculate cost of DR's memory access. */
1116 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1117 bool add_realign_cost
, unsigned int *inside_cost
,
1118 unsigned int *prologue_cost
,
1119 stmt_vector_for_cost
*prologue_cost_vec
,
1120 stmt_vector_for_cost
*body_cost_vec
,
1121 bool record_prologue_costs
)
1123 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1124 gimple
*stmt
= DR_STMT (dr
);
1125 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1127 switch (alignment_support_scheme
)
1131 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1132 stmt_info
, 0, vect_body
);
1134 if (dump_enabled_p ())
1135 dump_printf_loc (MSG_NOTE
, vect_location
,
1136 "vect_model_load_cost: aligned.\n");
1140 case dr_unaligned_supported
:
1142 /* Here, we assign an additional cost for the unaligned load. */
1143 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1144 unaligned_load
, stmt_info
,
1145 DR_MISALIGNMENT (dr
), vect_body
);
1147 if (dump_enabled_p ())
1148 dump_printf_loc (MSG_NOTE
, vect_location
,
1149 "vect_model_load_cost: unaligned supported by "
1154 case dr_explicit_realign
:
1156 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1157 vector_load
, stmt_info
, 0, vect_body
);
1158 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1159 vec_perm
, stmt_info
, 0, vect_body
);
1161 /* FIXME: If the misalignment remains fixed across the iterations of
1162 the containing loop, the following cost should be added to the
1164 if (targetm
.vectorize
.builtin_mask_for_load
)
1165 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1166 stmt_info
, 0, vect_body
);
1168 if (dump_enabled_p ())
1169 dump_printf_loc (MSG_NOTE
, vect_location
,
1170 "vect_model_load_cost: explicit realign\n");
1174 case dr_explicit_realign_optimized
:
1176 if (dump_enabled_p ())
1177 dump_printf_loc (MSG_NOTE
, vect_location
,
1178 "vect_model_load_cost: unaligned software "
1181 /* Unaligned software pipeline has a load of an address, an initial
1182 load, and possibly a mask operation to "prime" the loop. However,
1183 if this is an access in a group of loads, which provide grouped
1184 access, then the above cost should only be considered for one
1185 access in the group. Inside the loop, there is a load op
1186 and a realignment op. */
1188 if (add_realign_cost
&& record_prologue_costs
)
1190 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1191 vector_stmt
, stmt_info
,
1193 if (targetm
.vectorize
.builtin_mask_for_load
)
1194 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1195 vector_stmt
, stmt_info
,
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1200 stmt_info
, 0, vect_body
);
1201 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1202 stmt_info
, 0, vect_body
);
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE
, vect_location
,
1206 "vect_model_load_cost: explicit realign optimized"
1212 case dr_unaligned_unsupported
:
1214 *inside_cost
= VECT_MAX_COST
;
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1218 "vect_model_load_cost: unsupported access.\n");
1227 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1228 the loop preheader for the vectorized stmt STMT. */
1231 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1234 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1237 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1238 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1242 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1246 if (nested_in_vect_loop_p (loop
, stmt
))
1249 pe
= loop_preheader_edge (loop
);
1250 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1251 gcc_assert (!new_bb
);
1255 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1257 gimple_stmt_iterator gsi_bb_start
;
1259 gcc_assert (bb_vinfo
);
1260 bb
= BB_VINFO_BB (bb_vinfo
);
1261 gsi_bb_start
= gsi_after_labels (bb
);
1262 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1266 if (dump_enabled_p ())
1268 dump_printf_loc (MSG_NOTE
, vect_location
,
1269 "created new init_stmt: ");
1270 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1274 /* Function vect_init_vector.
1276 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1277 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1278 vector type a vector with all elements equal to VAL is created first.
1279 Place the initialization at BSI if it is not NULL. Otherwise, place the
1280 initialization at the loop preheader.
1281 Return the DEF of INIT_STMT.
1282 It will be used in the vectorization of STMT. */
1285 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1290 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1291 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1293 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1294 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1296 /* Scalar boolean value should be transformed into
1297 all zeros or all ones value before building a vector. */
1298 if (VECTOR_BOOLEAN_TYPE_P (type
))
1300 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1301 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1303 if (CONSTANT_CLASS_P (val
))
1304 val
= integer_zerop (val
) ? false_val
: true_val
;
1307 new_temp
= make_ssa_name (TREE_TYPE (type
));
1308 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1309 val
, true_val
, false_val
);
1310 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1314 else if (CONSTANT_CLASS_P (val
))
1315 val
= fold_convert (TREE_TYPE (type
), val
);
1318 new_temp
= make_ssa_name (TREE_TYPE (type
));
1319 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1320 init_stmt
= gimple_build_assign (new_temp
,
1321 fold_build1 (VIEW_CONVERT_EXPR
,
1325 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1326 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1330 val
= build_vector_from_val (type
, val
);
1333 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1334 init_stmt
= gimple_build_assign (new_temp
, val
);
1335 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1339 /* Function vect_get_vec_def_for_operand_1.
1341 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1342 DT that will be used in the vectorized stmt. */
1345 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1349 stmt_vec_info def_stmt_info
= NULL
;
1353 /* operand is a constant or a loop invariant. */
1354 case vect_constant_def
:
1355 case vect_external_def
:
1356 /* Code should use vect_get_vec_def_for_operand. */
1359 /* operand is defined inside the loop. */
1360 case vect_internal_def
:
1362 /* Get the def from the vectorized stmt. */
1363 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1365 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1366 /* Get vectorized pattern statement. */
1368 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1369 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1370 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1371 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1372 gcc_assert (vec_stmt
);
1373 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1374 vec_oprnd
= PHI_RESULT (vec_stmt
);
1375 else if (is_gimple_call (vec_stmt
))
1376 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1378 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1382 /* operand is defined by a loop header phi. */
1383 case vect_reduction_def
:
1384 case vect_double_reduction_def
:
1385 case vect_nested_cycle
:
1386 case vect_induction_def
:
1388 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1390 /* Get the def from the vectorized stmt. */
1391 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1392 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1393 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1394 vec_oprnd
= PHI_RESULT (vec_stmt
);
1396 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1406 /* Function vect_get_vec_def_for_operand.
1408 OP is an operand in STMT. This function returns a (vector) def that will be
1409 used in the vectorized stmt for STMT.
1411 In the case that OP is an SSA_NAME which is defined in the loop, then
1412 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1414 In case OP is an invariant or constant, a new stmt that creates a vector def
1415 needs to be introduced. VECTYPE may be used to specify a required type for
1416 vector invariant. */
1419 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1422 enum vect_def_type dt
;
1424 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1425 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1427 if (dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE
, vect_location
,
1430 "vect_get_vec_def_for_operand: ");
1431 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1432 dump_printf (MSG_NOTE
, "\n");
1435 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1436 gcc_assert (is_simple_use
);
1437 if (def_stmt
&& dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1440 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1443 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1445 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1449 vector_type
= vectype
;
1450 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1451 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1452 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1454 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1456 gcc_assert (vector_type
);
1457 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1460 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1464 /* Function vect_get_vec_def_for_stmt_copy
1466 Return a vector-def for an operand. This function is used when the
1467 vectorized stmt to be created (by the caller to this function) is a "copy"
1468 created in case the vectorized result cannot fit in one vector, and several
1469 copies of the vector-stmt are required. In this case the vector-def is
1470 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1471 of the stmt that defines VEC_OPRND.
1472 DT is the type of the vector def VEC_OPRND.
1475 In case the vectorization factor (VF) is bigger than the number
1476 of elements that can fit in a vectype (nunits), we have to generate
1477 more than one vector stmt to vectorize the scalar stmt. This situation
1478 arises when there are multiple data-types operated upon in the loop; the
1479 smallest data-type determines the VF, and as a result, when vectorizing
1480 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1481 vector stmt (each computing a vector of 'nunits' results, and together
1482 computing 'VF' results in each iteration). This function is called when
1483 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1484 which VF=16 and nunits=4, so the number of copies required is 4):
1486 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1488 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1489 VS1.1: vx.1 = memref1 VS1.2
1490 VS1.2: vx.2 = memref2 VS1.3
1491 VS1.3: vx.3 = memref3
1493 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1494 VSnew.1: vz1 = vx.1 + ... VSnew.2
1495 VSnew.2: vz2 = vx.2 + ... VSnew.3
1496 VSnew.3: vz3 = vx.3 + ...
1498 The vectorization of S1 is explained in vectorizable_load.
1499 The vectorization of S2:
1500 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1501 the function 'vect_get_vec_def_for_operand' is called to
1502 get the relevant vector-def for each operand of S2. For operand x it
1503 returns the vector-def 'vx.0'.
1505 To create the remaining copies of the vector-stmt (VSnew.j), this
1506 function is called to get the relevant vector-def for each operand. It is
1507 obtained from the respective VS1.j stmt, which is recorded in the
1508 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1510 For example, to obtain the vector-def 'vx.1' in order to create the
1511 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1512 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1513 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1514 and return its def ('vx.1').
1515 Overall, to create the above sequence this function will be called 3 times:
1516 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1517 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1518 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1521 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1523 gimple
*vec_stmt_for_operand
;
1524 stmt_vec_info def_stmt_info
;
1526 /* Do nothing; can reuse same def. */
1527 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1530 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1531 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1532 gcc_assert (def_stmt_info
);
1533 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1534 gcc_assert (vec_stmt_for_operand
);
1535 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1536 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1538 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1543 /* Get vectorized definitions for the operands to create a copy of an original
1544 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1547 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1548 vec
<tree
> *vec_oprnds0
,
1549 vec
<tree
> *vec_oprnds1
)
1551 tree vec_oprnd
= vec_oprnds0
->pop ();
1553 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1554 vec_oprnds0
->quick_push (vec_oprnd
);
1556 if (vec_oprnds1
&& vec_oprnds1
->length ())
1558 vec_oprnd
= vec_oprnds1
->pop ();
1559 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1560 vec_oprnds1
->quick_push (vec_oprnd
);
1565 /* Get vectorized definitions for OP0 and OP1. */
1568 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1569 vec
<tree
> *vec_oprnds0
,
1570 vec
<tree
> *vec_oprnds1
,
1575 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1576 auto_vec
<tree
> ops (nops
);
1577 auto_vec
<vec
<tree
> > vec_defs (nops
);
1579 ops
.quick_push (op0
);
1581 ops
.quick_push (op1
);
1583 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1585 *vec_oprnds0
= vec_defs
[0];
1587 *vec_oprnds1
= vec_defs
[1];
1593 vec_oprnds0
->create (1);
1594 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1595 vec_oprnds0
->quick_push (vec_oprnd
);
1599 vec_oprnds1
->create (1);
1600 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1601 vec_oprnds1
->quick_push (vec_oprnd
);
1606 /* Helper function called by vect_finish_replace_stmt and
1607 vect_finish_stmt_generation. Set the location of the new
1608 statement and create a stmt_vec_info for it. */
1611 vect_finish_stmt_generation_1 (gimple
*stmt
, gimple
*vec_stmt
)
1613 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1614 vec_info
*vinfo
= stmt_info
->vinfo
;
1616 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1618 if (dump_enabled_p ())
1620 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1621 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1624 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1626 /* While EH edges will generally prevent vectorization, stmt might
1627 e.g. be in a must-not-throw region. Ensure newly created stmts
1628 that could throw are part of the same region. */
1629 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1630 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1631 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1634 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1635 which sets the same scalar result as STMT did. */
1638 vect_finish_replace_stmt (gimple
*stmt
, gimple
*vec_stmt
)
1640 gcc_assert (gimple_get_lhs (stmt
) == gimple_get_lhs (vec_stmt
));
1642 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1643 gsi_replace (&gsi
, vec_stmt
, false);
1645 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1648 /* Function vect_finish_stmt_generation.
1650 Insert a new stmt. */
1653 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1654 gimple_stmt_iterator
*gsi
)
1656 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1658 if (!gsi_end_p (*gsi
)
1659 && gimple_has_mem_ops (vec_stmt
))
1661 gimple
*at_stmt
= gsi_stmt (*gsi
);
1662 tree vuse
= gimple_vuse (at_stmt
);
1663 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1665 tree vdef
= gimple_vdef (at_stmt
);
1666 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1667 /* If we have an SSA vuse and insert a store, update virtual
1668 SSA form to avoid triggering the renamer. Do so only
1669 if we can easily see all uses - which is what almost always
1670 happens with the way vectorized stmts are inserted. */
1671 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1672 && ((is_gimple_assign (vec_stmt
)
1673 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1674 || (is_gimple_call (vec_stmt
)
1675 && !(gimple_call_flags (vec_stmt
)
1676 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1678 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1679 gimple_set_vdef (vec_stmt
, new_vdef
);
1680 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1684 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1685 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1688 /* We want to vectorize a call to combined function CFN with function
1689 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1690 as the types of all inputs. Check whether this is possible using
1691 an internal function, returning its code if so or IFN_LAST if not. */
1694 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1695 tree vectype_out
, tree vectype_in
)
1698 if (internal_fn_p (cfn
))
1699 ifn
= as_internal_fn (cfn
);
1701 ifn
= associated_internal_fn (fndecl
);
1702 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1704 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1705 if (info
.vectorizable
)
1707 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1708 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1709 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1710 OPTIMIZE_FOR_SPEED
))
1718 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1719 gimple_stmt_iterator
*);
1721 /* Check whether a load or store statement in the loop described by
1722 LOOP_VINFO is possible in a fully-masked loop. This is testing
1723 whether the vectorizer pass has the appropriate support, as well as
1724 whether the target does.
1726 VLS_TYPE says whether the statement is a load or store and VECTYPE
1727 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1728 says how the load or store is going to be implemented and GROUP_SIZE
1729 is the number of load or store statements in the containing group.
1731 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1732 supported, otherwise record the required mask types. */
1735 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1736 vec_load_store_type vls_type
, int group_size
,
1737 vect_memory_access_type memory_access_type
)
1739 /* Invariant loads need no special support. */
1740 if (memory_access_type
== VMAT_INVARIANT
)
1743 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1744 machine_mode vecmode
= TYPE_MODE (vectype
);
1745 bool is_load
= (vls_type
== VLS_LOAD
);
1746 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1749 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1750 : !vect_store_lanes_supported (vectype
, group_size
, true))
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1754 "can't use a fully-masked loop because the"
1755 " target doesn't have an appropriate masked"
1756 " load/store-lanes instruction.\n");
1757 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1760 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1761 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1765 if (memory_access_type
!= VMAT_CONTIGUOUS
1766 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1768 /* Element X of the data must come from iteration i * VF + X of the
1769 scalar loop. We need more work to support other mappings. */
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1772 "can't use a fully-masked loop because an access"
1773 " isn't contiguous.\n");
1774 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1778 machine_mode mask_mode
;
1779 if (!(targetm
.vectorize
.get_mask_mode
1780 (GET_MODE_NUNITS (vecmode
),
1781 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1782 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1784 if (dump_enabled_p ())
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1786 "can't use a fully-masked loop because the target"
1787 " doesn't have the appropriate masked load or"
1789 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1792 /* We might load more scalars than we need for permuting SLP loads.
1793 We checked in get_group_load_store_type that the extra elements
1794 don't leak into a new vector. */
1795 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1796 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1797 unsigned int nvectors
;
1798 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1799 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1804 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1805 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1806 that needs to be applied to all loads and stores in a vectorized loop.
1807 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1809 MASK_TYPE is the type of both masks. If new statements are needed,
1810 insert them before GSI. */
1813 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1814 gimple_stmt_iterator
*gsi
)
1816 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1820 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1821 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1822 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1823 vec_mask
, loop_mask
);
1824 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1828 /* STMT is a non-strided load or store, meaning that it accesses
1829 elements with a known constant step. Return -1 if that step
1830 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1833 compare_step_with_zero (gimple
*stmt
)
1835 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1836 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1837 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
1841 /* If the target supports a permute mask that reverses the elements in
1842 a vector of type VECTYPE, return that mask, otherwise return null. */
1845 perm_mask_for_reverse (tree vectype
)
1847 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1849 /* The encoding has a single stepped pattern. */
1850 vec_perm_builder
sel (nunits
, 1, 3);
1851 for (int i
= 0; i
< 3; ++i
)
1852 sel
.quick_push (nunits
- 1 - i
);
1854 vec_perm_indices
indices (sel
, 1, nunits
);
1855 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1857 return vect_gen_perm_mask_checked (vectype
, indices
);
1860 /* STMT is either a masked or unconditional store. Return the value
1864 vect_get_store_rhs (gimple
*stmt
)
1866 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt
))
1868 gcc_assert (gimple_assign_single_p (assign
));
1869 return gimple_assign_rhs1 (assign
);
1871 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
1873 internal_fn ifn
= gimple_call_internal_fn (call
);
1874 gcc_assert (ifn
== IFN_MASK_STORE
);
1875 return gimple_call_arg (stmt
, 3);
1880 /* A subroutine of get_load_store_type, with a subset of the same
1881 arguments. Handle the case where STMT is part of a grouped load
1884 For stores, the statements in the group are all consecutive
1885 and there is no gap at the end. For loads, the statements in the
1886 group might not be consecutive; there can be gaps between statements
1887 as well as at the end. */
1890 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1891 bool masked_p
, vec_load_store_type vls_type
,
1892 vect_memory_access_type
*memory_access_type
)
1894 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1895 vec_info
*vinfo
= stmt_info
->vinfo
;
1896 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1897 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1898 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1899 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1900 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1901 bool single_element_p
= (stmt
== first_stmt
1902 && !GROUP_NEXT_ELEMENT (stmt_info
));
1903 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1904 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1906 /* True if the vectorized statements would access beyond the last
1907 statement in the group. */
1908 bool overrun_p
= false;
1910 /* True if we can cope with such overrun by peeling for gaps, so that
1911 there is at least one final scalar iteration after the vector loop. */
1912 bool can_overrun_p
= (!masked_p
1913 && vls_type
== VLS_LOAD
1917 /* There can only be a gap at the end of the group if the stride is
1918 known at compile time. */
1919 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1921 /* Stores can't yet have gaps. */
1922 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1926 if (STMT_VINFO_STRIDED_P (stmt_info
))
1928 /* Try to use consecutive accesses of GROUP_SIZE elements,
1929 separated by the stride, until we have a complete vector.
1930 Fall back to scalar accesses if that isn't possible. */
1931 if (multiple_p (nunits
, group_size
))
1932 *memory_access_type
= VMAT_STRIDED_SLP
;
1934 *memory_access_type
= VMAT_ELEMENTWISE
;
1938 overrun_p
= loop_vinfo
&& gap
!= 0;
1939 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1942 "Grouped store with gaps requires"
1943 " non-consecutive accesses\n");
1946 /* An overrun is fine if the trailing elements are smaller
1947 than the alignment boundary B. Every vector access will
1948 be a multiple of B and so we are guaranteed to access a
1949 non-gap element in the same B-sized block. */
1951 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1952 / vect_get_scalar_dr_size (first_dr
)))
1954 if (overrun_p
&& !can_overrun_p
)
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1958 "Peeling for outer loop is not supported\n");
1961 *memory_access_type
= VMAT_CONTIGUOUS
;
1966 /* We can always handle this case using elementwise accesses,
1967 but see if something more efficient is available. */
1968 *memory_access_type
= VMAT_ELEMENTWISE
;
1970 /* If there is a gap at the end of the group then these optimizations
1971 would access excess elements in the last iteration. */
1972 bool would_overrun_p
= (gap
!= 0);
1973 /* An overrun is fine if the trailing elements are smaller than the
1974 alignment boundary B. Every vector access will be a multiple of B
1975 and so we are guaranteed to access a non-gap element in the
1976 same B-sized block. */
1979 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1980 / vect_get_scalar_dr_size (first_dr
)))
1981 would_overrun_p
= false;
1983 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1984 && (can_overrun_p
|| !would_overrun_p
)
1985 && compare_step_with_zero (stmt
) > 0)
1987 /* First cope with the degenerate case of a single-element
1989 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
1990 *memory_access_type
= VMAT_CONTIGUOUS
;
1992 /* Otherwise try using LOAD/STORE_LANES. */
1993 if (*memory_access_type
== VMAT_ELEMENTWISE
1994 && (vls_type
== VLS_LOAD
1995 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
1996 : vect_store_lanes_supported (vectype
, group_size
,
1999 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2000 overrun_p
= would_overrun_p
;
2003 /* If that fails, try using permuting loads. */
2004 if (*memory_access_type
== VMAT_ELEMENTWISE
2005 && (vls_type
== VLS_LOAD
2006 ? vect_grouped_load_supported (vectype
, single_element_p
,
2008 : vect_grouped_store_supported (vectype
, group_size
)))
2010 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2011 overrun_p
= would_overrun_p
;
2016 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
2018 /* STMT is the leader of the group. Check the operands of all the
2019 stmts of the group. */
2020 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
2023 tree op
= vect_get_store_rhs (next_stmt
);
2025 enum vect_def_type dt
;
2026 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2030 "use not simple.\n");
2033 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2039 gcc_assert (can_overrun_p
);
2040 if (dump_enabled_p ())
2041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2042 "Data access with gaps requires scalar "
2044 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2050 /* A subroutine of get_load_store_type, with a subset of the same
2051 arguments. Handle the case where STMT is a load or store that
2052 accesses consecutive elements with a negative step. */
2054 static vect_memory_access_type
2055 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
2056 vec_load_store_type vls_type
,
2057 unsigned int ncopies
)
2059 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2060 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2061 dr_alignment_support alignment_support_scheme
;
2065 if (dump_enabled_p ())
2066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2067 "multiple types with negative step.\n");
2068 return VMAT_ELEMENTWISE
;
2071 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
2072 if (alignment_support_scheme
!= dr_aligned
2073 && alignment_support_scheme
!= dr_unaligned_supported
)
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2077 "negative step but alignment required.\n");
2078 return VMAT_ELEMENTWISE
;
2081 if (vls_type
== VLS_STORE_INVARIANT
)
2083 if (dump_enabled_p ())
2084 dump_printf_loc (MSG_NOTE
, vect_location
,
2085 "negative step with invariant source;"
2086 " no permute needed.\n");
2087 return VMAT_CONTIGUOUS_DOWN
;
2090 if (!perm_mask_for_reverse (vectype
))
2092 if (dump_enabled_p ())
2093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2094 "negative step and reversing not supported.\n");
2095 return VMAT_ELEMENTWISE
;
2098 return VMAT_CONTIGUOUS_REVERSE
;
2101 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2102 if there is a memory access type that the vectorized form can use,
2103 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2104 or scatters, fill in GS_INFO accordingly.
2106 SLP says whether we're performing SLP rather than loop vectorization.
2107 MASKED_P is true if the statement is conditional on a vectorized mask.
2108 VECTYPE is the vector type that the vectorized statements will use.
2109 NCOPIES is the number of vector statements that will be needed. */
2112 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
, bool masked_p
,
2113 vec_load_store_type vls_type
, unsigned int ncopies
,
2114 vect_memory_access_type
*memory_access_type
,
2115 gather_scatter_info
*gs_info
)
2117 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2118 vec_info
*vinfo
= stmt_info
->vinfo
;
2119 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2120 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2121 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2123 *memory_access_type
= VMAT_GATHER_SCATTER
;
2125 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
2127 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
2128 &gs_info
->offset_dt
,
2129 &gs_info
->offset_vectype
))
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2133 "%s index use not simple.\n",
2134 vls_type
== VLS_LOAD
? "gather" : "scatter");
2138 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2140 if (!get_group_load_store_type (stmt
, vectype
, slp
, masked_p
, vls_type
,
2141 memory_access_type
))
2144 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2147 *memory_access_type
= VMAT_ELEMENTWISE
;
2151 int cmp
= compare_step_with_zero (stmt
);
2153 *memory_access_type
= get_negative_load_store_type
2154 (stmt
, vectype
, vls_type
, ncopies
);
2157 gcc_assert (vls_type
== VLS_LOAD
);
2158 *memory_access_type
= VMAT_INVARIANT
;
2161 *memory_access_type
= VMAT_CONTIGUOUS
;
2164 if ((*memory_access_type
== VMAT_ELEMENTWISE
2165 || *memory_access_type
== VMAT_STRIDED_SLP
)
2166 && !nunits
.is_constant ())
2168 if (dump_enabled_p ())
2169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2170 "Not using elementwise accesses due to variable "
2171 "vectorization factor.\n");
2175 /* FIXME: At the moment the cost model seems to underestimate the
2176 cost of using elementwise accesses. This check preserves the
2177 traditional behavior until that can be fixed. */
2178 if (*memory_access_type
== VMAT_ELEMENTWISE
2179 && !STMT_VINFO_STRIDED_P (stmt_info
))
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2183 "not falling back to elementwise accesses\n");
2189 /* Return true if boolean argument MASK is suitable for vectorizing
2190 conditional load or store STMT. When returning true, store the
2191 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2194 vect_check_load_store_mask (gimple
*stmt
, tree mask
, tree
*mask_vectype_out
)
2196 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2198 if (dump_enabled_p ())
2199 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2200 "mask argument is not a boolean.\n");
2204 if (TREE_CODE (mask
) != SSA_NAME
)
2206 if (dump_enabled_p ())
2207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2208 "mask argument is not an SSA name.\n");
2212 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2214 enum vect_def_type dt
;
2216 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &def_stmt
, &dt
,
2219 if (dump_enabled_p ())
2220 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2221 "mask use not simple.\n");
2225 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2227 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2229 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2231 if (dump_enabled_p ())
2232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2233 "could not find an appropriate vector mask type.\n");
2237 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2238 TYPE_VECTOR_SUBPARTS (vectype
)))
2240 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2243 "vector mask type ");
2244 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, mask_vectype
);
2245 dump_printf (MSG_MISSED_OPTIMIZATION
,
2246 " does not match vector data type ");
2247 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
2248 dump_printf (MSG_MISSED_OPTIMIZATION
, ".\n");
2253 *mask_vectype_out
= mask_vectype
;
2257 /* Return true if stored value RHS is suitable for vectorizing store
2258 statement STMT. When returning true, store the type of the
2259 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2260 store in *VLS_TYPE_OUT. */
2263 vect_check_store_rhs (gimple
*stmt
, tree rhs
, tree
*rhs_vectype_out
,
2264 vec_load_store_type
*vls_type_out
)
2266 /* In the case this is a store from a constant make sure
2267 native_encode_expr can handle it. */
2268 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2272 "cannot encode constant as a byte sequence.\n");
2276 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2278 enum vect_def_type dt
;
2280 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &def_stmt
, &dt
,
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2285 "use not simple.\n");
2289 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2290 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2294 "incompatible vector types.\n");
2298 *rhs_vectype_out
= rhs_vectype
;
2299 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2300 *vls_type_out
= VLS_STORE_INVARIANT
;
2302 *vls_type_out
= VLS_STORE
;
2306 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2307 Note that we support masks with floating-point type, in which case the
2308 floats are interpreted as a bitmask. */
2311 vect_build_all_ones_mask (gimple
*stmt
, tree masktype
)
2313 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2314 return build_int_cst (masktype
, -1);
2315 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2317 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2318 mask
= build_vector_from_val (masktype
, mask
);
2319 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2321 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2325 for (int j
= 0; j
< 6; ++j
)
2327 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2328 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2329 mask
= build_vector_from_val (masktype
, mask
);
2330 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2335 /* Build an all-zero merge value of type VECTYPE while vectorizing
2336 STMT as a gather load. */
2339 vect_build_zero_merge_argument (gimple
*stmt
, tree vectype
)
2342 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2343 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2344 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2348 for (int j
= 0; j
< 6; ++j
)
2350 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2351 merge
= build_real (TREE_TYPE (vectype
), r
);
2355 merge
= build_vector_from_val (vectype
, merge
);
2356 return vect_init_vector (stmt
, merge
, vectype
, NULL
);
2359 /* Build a gather load call while vectorizing STMT. Insert new instructions
2360 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2361 operation. If the load is conditional, MASK is the unvectorized
2362 condition, otherwise MASK is null. */
2365 vect_build_gather_load_calls (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2366 gimple
**vec_stmt
, gather_scatter_info
*gs_info
,
2369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2370 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2371 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2372 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2373 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2374 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2375 edge pe
= loop_preheader_edge (loop
);
2376 enum { NARROW
, NONE
, WIDEN
} modifier
;
2377 poly_uint64 gather_off_nunits
2378 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2380 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2381 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2382 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2383 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2384 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2385 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2386 tree scaletype
= TREE_VALUE (arglist
);
2387 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2388 && (!mask
|| types_compatible_p (srctype
, masktype
)));
2390 tree perm_mask
= NULL_TREE
;
2391 tree mask_perm_mask
= NULL_TREE
;
2392 if (known_eq (nunits
, gather_off_nunits
))
2394 else if (known_eq (nunits
* 2, gather_off_nunits
))
2398 /* Currently widening gathers and scatters are only supported for
2399 fixed-length vectors. */
2400 int count
= gather_off_nunits
.to_constant ();
2401 vec_perm_builder
sel (count
, count
, 1);
2402 for (int i
= 0; i
< count
; ++i
)
2403 sel
.quick_push (i
| (count
/ 2));
2405 vec_perm_indices
indices (sel
, 1, count
);
2406 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2409 else if (known_eq (nunits
, gather_off_nunits
* 2))
2413 /* Currently narrowing gathers and scatters are only supported for
2414 fixed-length vectors. */
2415 int count
= nunits
.to_constant ();
2416 vec_perm_builder
sel (count
, count
, 1);
2417 sel
.quick_grow (count
);
2418 for (int i
= 0; i
< count
; ++i
)
2419 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2420 vec_perm_indices
indices (sel
, 2, count
);
2421 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2427 for (int i
= 0; i
< count
; ++i
)
2428 sel
[i
] = i
| (count
/ 2);
2429 indices
.new_vector (sel
, 2, count
);
2430 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2436 tree vec_dest
= vect_create_destination_var (gimple_get_lhs (stmt
),
2439 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2440 if (!is_gimple_min_invariant (ptr
))
2443 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2444 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2445 gcc_assert (!new_bb
);
2448 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2450 tree vec_oprnd0
= NULL_TREE
;
2451 tree vec_mask
= NULL_TREE
;
2452 tree src_op
= NULL_TREE
;
2453 tree mask_op
= NULL_TREE
;
2454 tree prev_res
= NULL_TREE
;
2455 stmt_vec_info prev_stmt_info
= NULL
;
2459 src_op
= vect_build_zero_merge_argument (stmt
, rettype
);
2460 mask_op
= vect_build_all_ones_mask (stmt
, masktype
);
2463 for (int j
= 0; j
< ncopies
; ++j
)
2467 if (modifier
== WIDEN
&& (j
& 1))
2468 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2469 perm_mask
, stmt
, gsi
);
2472 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt
);
2475 = vect_get_vec_def_for_stmt_copy (gs_info
->offset_dt
, vec_oprnd0
);
2477 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2479 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2480 TYPE_VECTOR_SUBPARTS (idxtype
)));
2481 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2482 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2483 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2484 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2490 if (mask_perm_mask
&& (j
& 1))
2491 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2492 mask_perm_mask
, stmt
, gsi
);
2496 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2500 enum vect_def_type dt
;
2501 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2502 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2506 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2509 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
)),
2510 TYPE_VECTOR_SUBPARTS (masktype
)));
2511 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2512 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2513 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
2515 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2522 new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2525 if (!useless_type_conversion_p (vectype
, rettype
))
2527 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2528 TYPE_VECTOR_SUBPARTS (rettype
)));
2529 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2530 gimple_call_set_lhs (new_stmt
, op
);
2531 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2532 var
= make_ssa_name (vec_dest
);
2533 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2534 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2538 var
= make_ssa_name (vec_dest
, new_stmt
);
2539 gimple_call_set_lhs (new_stmt
, var
);
2542 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2544 if (modifier
== NARROW
)
2551 var
= permute_vec_elements (prev_res
, var
, perm_mask
, stmt
, gsi
);
2552 new_stmt
= SSA_NAME_DEF_STMT (var
);
2555 if (prev_stmt_info
== NULL
)
2556 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2558 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2559 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2563 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2566 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2567 gimple
**vec_stmt
, slp_tree slp_node
,
2568 tree vectype_in
, enum vect_def_type
*dt
)
2571 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2572 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2574 unsigned HOST_WIDE_INT nunits
, num_bytes
;
2576 op
= gimple_call_arg (stmt
, 0);
2577 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2579 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
2582 /* Multiple types in SLP are handled by creating the appropriate number of
2583 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2588 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2590 gcc_assert (ncopies
>= 1);
2592 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2596 if (!TYPE_VECTOR_SUBPARTS (char_vectype
).is_constant (&num_bytes
))
2599 unsigned word_bytes
= num_bytes
/ nunits
;
2601 /* The encoding uses one stepped pattern for each byte in the word. */
2602 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2603 for (unsigned i
= 0; i
< 3; ++i
)
2604 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2605 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2607 vec_perm_indices
indices (elts
, 1, num_bytes
);
2608 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2613 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2614 if (dump_enabled_p ())
2615 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2617 if (! PURE_SLP_STMT (stmt_info
))
2619 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2620 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2621 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2622 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2627 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2630 vec
<tree
> vec_oprnds
= vNULL
;
2631 gimple
*new_stmt
= NULL
;
2632 stmt_vec_info prev_stmt_info
= NULL
;
2633 for (unsigned j
= 0; j
< ncopies
; j
++)
2637 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2639 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2641 /* Arguments are ready. create the new vector stmt. */
2644 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2646 tree tem
= make_ssa_name (char_vectype
);
2647 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2648 char_vectype
, vop
));
2649 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2650 tree tem2
= make_ssa_name (char_vectype
);
2651 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2652 tem
, tem
, bswap_vconst
);
2653 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2654 tem
= make_ssa_name (vectype
);
2655 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2657 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2659 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2666 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2668 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2670 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2673 vec_oprnds
.release ();
2677 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2678 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2679 in a single step. On success, store the binary pack code in
2683 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2684 tree_code
*convert_code
)
2686 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2687 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2691 int multi_step_cvt
= 0;
2692 auto_vec
<tree
, 8> interm_types
;
2693 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2694 &code
, &multi_step_cvt
,
2699 *convert_code
= code
;
2703 /* Function vectorizable_call.
2705 Check if GS performs a function call that can be vectorized.
2706 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2707 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2708 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2711 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2718 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2719 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2720 tree vectype_out
, vectype_in
;
2721 poly_uint64 nunits_in
;
2722 poly_uint64 nunits_out
;
2723 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2724 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2725 vec_info
*vinfo
= stmt_info
->vinfo
;
2726 tree fndecl
, new_temp
, rhs_type
;
2728 enum vect_def_type dt
[3]
2729 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2731 gimple
*new_stmt
= NULL
;
2733 vec
<tree
> vargs
= vNULL
;
2734 enum { NARROW
, NONE
, WIDEN
} modifier
;
2738 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2741 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2745 /* Is GS a vectorizable call? */
2746 stmt
= dyn_cast
<gcall
*> (gs
);
2750 if (gimple_call_internal_p (stmt
)
2751 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2752 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2753 /* Handled by vectorizable_load and vectorizable_store. */
2756 if (gimple_call_lhs (stmt
) == NULL_TREE
2757 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2760 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2762 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2764 /* Process function arguments. */
2765 rhs_type
= NULL_TREE
;
2766 vectype_in
= NULL_TREE
;
2767 nargs
= gimple_call_num_args (stmt
);
2769 /* Bail out if the function has more than three arguments, we do not have
2770 interesting builtin functions to vectorize with more than two arguments
2771 except for fma. No arguments is also not good. */
2772 if (nargs
== 0 || nargs
> 3)
2775 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2776 if (gimple_call_internal_p (stmt
)
2777 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2780 rhs_type
= unsigned_type_node
;
2783 for (i
= 0; i
< nargs
; i
++)
2787 op
= gimple_call_arg (stmt
, i
);
2789 /* We can only handle calls with arguments of the same type. */
2791 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2793 if (dump_enabled_p ())
2794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2795 "argument types differ.\n");
2799 rhs_type
= TREE_TYPE (op
);
2801 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2803 if (dump_enabled_p ())
2804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2805 "use not simple.\n");
2810 vectype_in
= opvectype
;
2812 && opvectype
!= vectype_in
)
2814 if (dump_enabled_p ())
2815 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2816 "argument vector types differ.\n");
2820 /* If all arguments are external or constant defs use a vector type with
2821 the same size as the output vector type. */
2823 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2825 gcc_assert (vectype_in
);
2828 if (dump_enabled_p ())
2830 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2831 "no vectype for scalar type ");
2832 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2833 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2840 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2841 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2842 if (known_eq (nunits_in
* 2, nunits_out
))
2844 else if (known_eq (nunits_out
, nunits_in
))
2846 else if (known_eq (nunits_out
* 2, nunits_in
))
2851 /* We only handle functions that do not read or clobber memory. */
2852 if (gimple_vuse (stmt
))
2854 if (dump_enabled_p ())
2855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2856 "function reads from or writes to memory.\n");
2860 /* For now, we only vectorize functions if a target specific builtin
2861 is available. TODO -- in some cases, it might be profitable to
2862 insert the calls for pieces of the vector, in order to be able
2863 to vectorize other operations in the loop. */
2865 internal_fn ifn
= IFN_LAST
;
2866 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2867 tree callee
= gimple_call_fndecl (stmt
);
2869 /* First try using an internal function. */
2870 tree_code convert_code
= ERROR_MARK
;
2872 && (modifier
== NONE
2873 || (modifier
== NARROW
2874 && simple_integer_narrowing (vectype_out
, vectype_in
,
2876 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2879 /* If that fails, try asking for a target-specific built-in function. */
2880 if (ifn
== IFN_LAST
)
2882 if (cfn
!= CFN_LAST
)
2883 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2884 (cfn
, vectype_out
, vectype_in
);
2886 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2887 (callee
, vectype_out
, vectype_in
);
2890 if (ifn
== IFN_LAST
&& !fndecl
)
2892 if (cfn
== CFN_GOMP_SIMD_LANE
2895 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2896 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2897 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2898 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2900 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2901 { 0, 1, 2, ... vf - 1 } vector. */
2902 gcc_assert (nargs
== 0);
2904 else if (modifier
== NONE
2905 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2906 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2907 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2908 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2912 if (dump_enabled_p ())
2913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2914 "function is not vectorizable.\n");
2921 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2922 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
2924 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
2926 /* Sanity check: make sure that at least one copy of the vectorized stmt
2927 needs to be generated. */
2928 gcc_assert (ncopies
>= 1);
2930 if (!vec_stmt
) /* transformation not required. */
2932 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2933 if (dump_enabled_p ())
2934 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2936 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
2937 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2938 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2939 vec_promote_demote
, stmt_info
, 0, vect_body
);
2946 if (dump_enabled_p ())
2947 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2950 scalar_dest
= gimple_call_lhs (stmt
);
2951 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2953 prev_stmt_info
= NULL
;
2954 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2956 tree prev_res
= NULL_TREE
;
2957 for (j
= 0; j
< ncopies
; ++j
)
2959 /* Build argument list for the vectorized call. */
2961 vargs
.create (nargs
);
2967 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2968 vec
<tree
> vec_oprnds0
;
2970 for (i
= 0; i
< nargs
; i
++)
2971 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2972 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
2973 vec_oprnds0
= vec_defs
[0];
2975 /* Arguments are ready. Create the new vector stmt. */
2976 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2979 for (k
= 0; k
< nargs
; k
++)
2981 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2982 vargs
[k
] = vec_oprndsk
[i
];
2984 if (modifier
== NARROW
)
2986 tree half_res
= make_ssa_name (vectype_in
);
2988 = gimple_build_call_internal_vec (ifn
, vargs
);
2989 gimple_call_set_lhs (call
, half_res
);
2990 gimple_call_set_nothrow (call
, true);
2992 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2995 prev_res
= half_res
;
2998 new_temp
= make_ssa_name (vec_dest
);
2999 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3000 prev_res
, half_res
);
3005 if (ifn
!= IFN_LAST
)
3006 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3008 call
= gimple_build_call_vec (fndecl
, vargs
);
3009 new_temp
= make_ssa_name (vec_dest
, call
);
3010 gimple_call_set_lhs (call
, new_temp
);
3011 gimple_call_set_nothrow (call
, true);
3014 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3015 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3018 for (i
= 0; i
< nargs
; i
++)
3020 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3021 vec_oprndsi
.release ();
3026 for (i
= 0; i
< nargs
; i
++)
3028 op
= gimple_call_arg (stmt
, i
);
3031 = vect_get_vec_def_for_operand (op
, stmt
);
3034 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
3036 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3039 vargs
.quick_push (vec_oprnd0
);
3042 if (gimple_call_internal_p (stmt
)
3043 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
3045 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3047 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3048 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3049 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
3050 new_temp
= make_ssa_name (vec_dest
);
3051 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3053 else if (modifier
== NARROW
)
3055 tree half_res
= make_ssa_name (vectype_in
);
3056 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3057 gimple_call_set_lhs (call
, half_res
);
3058 gimple_call_set_nothrow (call
, true);
3060 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3063 prev_res
= half_res
;
3066 new_temp
= make_ssa_name (vec_dest
);
3067 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3068 prev_res
, half_res
);
3073 if (ifn
!= IFN_LAST
)
3074 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3076 call
= gimple_build_call_vec (fndecl
, vargs
);
3077 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3078 gimple_call_set_lhs (call
, new_temp
);
3079 gimple_call_set_nothrow (call
, true);
3082 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3084 if (j
== (modifier
== NARROW
? 1 : 0))
3085 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3087 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3089 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3092 else if (modifier
== NARROW
)
3094 for (j
= 0; j
< ncopies
; ++j
)
3096 /* Build argument list for the vectorized call. */
3098 vargs
.create (nargs
* 2);
3104 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3105 vec
<tree
> vec_oprnds0
;
3107 for (i
= 0; i
< nargs
; i
++)
3108 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3109 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3110 vec_oprnds0
= vec_defs
[0];
3112 /* Arguments are ready. Create the new vector stmt. */
3113 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3117 for (k
= 0; k
< nargs
; k
++)
3119 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3120 vargs
.quick_push (vec_oprndsk
[i
]);
3121 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3124 if (ifn
!= IFN_LAST
)
3125 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3127 call
= gimple_build_call_vec (fndecl
, vargs
);
3128 new_temp
= make_ssa_name (vec_dest
, call
);
3129 gimple_call_set_lhs (call
, new_temp
);
3130 gimple_call_set_nothrow (call
, true);
3132 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3133 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3136 for (i
= 0; i
< nargs
; i
++)
3138 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3139 vec_oprndsi
.release ();
3144 for (i
= 0; i
< nargs
; i
++)
3146 op
= gimple_call_arg (stmt
, i
);
3150 = vect_get_vec_def_for_operand (op
, stmt
);
3152 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3156 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3158 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3160 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3163 vargs
.quick_push (vec_oprnd0
);
3164 vargs
.quick_push (vec_oprnd1
);
3167 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3168 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3169 gimple_call_set_lhs (new_stmt
, new_temp
);
3170 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3173 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3175 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3177 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3180 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3183 /* No current target implements this case. */
3188 /* The call in STMT might prevent it from being removed in dce.
3189 We however cannot remove it here, due to the way the ssa name
3190 it defines is mapped to the new definition. So just replace
3191 rhs of the statement with something harmless. */
3196 type
= TREE_TYPE (scalar_dest
);
3197 if (is_pattern_stmt_p (stmt_info
))
3198 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3200 lhs
= gimple_call_lhs (stmt
);
3202 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3203 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3204 set_vinfo_for_stmt (stmt
, NULL
);
3205 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3206 gsi_replace (gsi
, new_stmt
, false);
3212 struct simd_call_arg_info
3216 HOST_WIDE_INT linear_step
;
3217 enum vect_def_type dt
;
3219 bool simd_lane_linear
;
3222 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3223 is linear within simd lane (but not within whole loop), note it in
3227 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3228 struct simd_call_arg_info
*arginfo
)
3230 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3232 if (!is_gimple_assign (def_stmt
)
3233 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3234 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3237 tree base
= gimple_assign_rhs1 (def_stmt
);
3238 HOST_WIDE_INT linear_step
= 0;
3239 tree v
= gimple_assign_rhs2 (def_stmt
);
3240 while (TREE_CODE (v
) == SSA_NAME
)
3243 def_stmt
= SSA_NAME_DEF_STMT (v
);
3244 if (is_gimple_assign (def_stmt
))
3245 switch (gimple_assign_rhs_code (def_stmt
))
3248 t
= gimple_assign_rhs2 (def_stmt
);
3249 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3251 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3252 v
= gimple_assign_rhs1 (def_stmt
);
3255 t
= gimple_assign_rhs2 (def_stmt
);
3256 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3258 linear_step
= tree_to_shwi (t
);
3259 v
= gimple_assign_rhs1 (def_stmt
);
3262 t
= gimple_assign_rhs1 (def_stmt
);
3263 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3264 || (TYPE_PRECISION (TREE_TYPE (v
))
3265 < TYPE_PRECISION (TREE_TYPE (t
))))
3274 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3276 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3277 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3282 arginfo
->linear_step
= linear_step
;
3284 arginfo
->simd_lane_linear
= true;
3290 /* Return the number of elements in vector type VECTYPE, which is associated
3291 with a SIMD clone. At present these vectors always have a constant
3294 static unsigned HOST_WIDE_INT
3295 simd_clone_subparts (tree vectype
)
3297 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3300 /* Function vectorizable_simd_clone_call.
3302 Check if STMT performs a function call that can be vectorized
3303 by calling a simd clone of the function.
3304 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3305 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3306 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3309 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3310 gimple
**vec_stmt
, slp_tree slp_node
)
3315 tree vec_oprnd0
= NULL_TREE
;
3316 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3318 unsigned int nunits
;
3319 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3320 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3321 vec_info
*vinfo
= stmt_info
->vinfo
;
3322 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3323 tree fndecl
, new_temp
;
3325 gimple
*new_stmt
= NULL
;
3327 auto_vec
<simd_call_arg_info
> arginfo
;
3328 vec
<tree
> vargs
= vNULL
;
3330 tree lhs
, rtype
, ratype
;
3331 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3333 /* Is STMT a vectorizable call? */
3334 if (!is_gimple_call (stmt
))
3337 fndecl
= gimple_call_fndecl (stmt
);
3338 if (fndecl
== NULL_TREE
)
3341 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3342 if (node
== NULL
|| node
->simd_clones
== NULL
)
3345 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3348 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3352 if (gimple_call_lhs (stmt
)
3353 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3356 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3358 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3360 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3367 /* Process function arguments. */
3368 nargs
= gimple_call_num_args (stmt
);
3370 /* Bail out if the function has zero arguments. */
3374 arginfo
.reserve (nargs
, true);
3376 for (i
= 0; i
< nargs
; i
++)
3378 simd_call_arg_info thisarginfo
;
3381 thisarginfo
.linear_step
= 0;
3382 thisarginfo
.align
= 0;
3383 thisarginfo
.op
= NULL_TREE
;
3384 thisarginfo
.simd_lane_linear
= false;
3386 op
= gimple_call_arg (stmt
, i
);
3387 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3388 &thisarginfo
.vectype
)
3389 || thisarginfo
.dt
== vect_uninitialized_def
)
3391 if (dump_enabled_p ())
3392 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3393 "use not simple.\n");
3397 if (thisarginfo
.dt
== vect_constant_def
3398 || thisarginfo
.dt
== vect_external_def
)
3399 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3401 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3403 /* For linear arguments, the analyze phase should have saved
3404 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3405 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3406 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3408 gcc_assert (vec_stmt
);
3409 thisarginfo
.linear_step
3410 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3412 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3413 thisarginfo
.simd_lane_linear
3414 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3415 == boolean_true_node
);
3416 /* If loop has been peeled for alignment, we need to adjust it. */
3417 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3418 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3419 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3421 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3422 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3423 tree opt
= TREE_TYPE (thisarginfo
.op
);
3424 bias
= fold_convert (TREE_TYPE (step
), bias
);
3425 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3427 = fold_build2 (POINTER_TYPE_P (opt
)
3428 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3429 thisarginfo
.op
, bias
);
3433 && thisarginfo
.dt
!= vect_constant_def
3434 && thisarginfo
.dt
!= vect_external_def
3436 && TREE_CODE (op
) == SSA_NAME
3437 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3439 && tree_fits_shwi_p (iv
.step
))
3441 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3442 thisarginfo
.op
= iv
.base
;
3444 else if ((thisarginfo
.dt
== vect_constant_def
3445 || thisarginfo
.dt
== vect_external_def
)
3446 && POINTER_TYPE_P (TREE_TYPE (op
)))
3447 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3448 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3450 if (POINTER_TYPE_P (TREE_TYPE (op
))
3451 && !thisarginfo
.linear_step
3453 && thisarginfo
.dt
!= vect_constant_def
3454 && thisarginfo
.dt
!= vect_external_def
3457 && TREE_CODE (op
) == SSA_NAME
)
3458 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3460 arginfo
.quick_push (thisarginfo
);
3463 unsigned HOST_WIDE_INT vf
;
3464 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3468 "not considering SIMD clones; not yet supported"
3469 " for variable-width vectors.\n");
3473 unsigned int badness
= 0;
3474 struct cgraph_node
*bestn
= NULL
;
3475 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3476 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3478 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3479 n
= n
->simdclone
->next_clone
)
3481 unsigned int this_badness
= 0;
3482 if (n
->simdclone
->simdlen
> vf
3483 || n
->simdclone
->nargs
!= nargs
)
3485 if (n
->simdclone
->simdlen
< vf
)
3486 this_badness
+= (exact_log2 (vf
)
3487 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3488 if (n
->simdclone
->inbranch
)
3489 this_badness
+= 2048;
3490 int target_badness
= targetm
.simd_clone
.usable (n
);
3491 if (target_badness
< 0)
3493 this_badness
+= target_badness
* 512;
3494 /* FORNOW: Have to add code to add the mask argument. */
3495 if (n
->simdclone
->inbranch
)
3497 for (i
= 0; i
< nargs
; i
++)
3499 switch (n
->simdclone
->args
[i
].arg_type
)
3501 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3502 if (!useless_type_conversion_p
3503 (n
->simdclone
->args
[i
].orig_type
,
3504 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3506 else if (arginfo
[i
].dt
== vect_constant_def
3507 || arginfo
[i
].dt
== vect_external_def
3508 || arginfo
[i
].linear_step
)
3511 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3512 if (arginfo
[i
].dt
!= vect_constant_def
3513 && arginfo
[i
].dt
!= vect_external_def
)
3516 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3517 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3518 if (arginfo
[i
].dt
== vect_constant_def
3519 || arginfo
[i
].dt
== vect_external_def
3520 || (arginfo
[i
].linear_step
3521 != n
->simdclone
->args
[i
].linear_step
))
3524 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3525 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3526 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3527 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3528 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3529 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3533 case SIMD_CLONE_ARG_TYPE_MASK
:
3536 if (i
== (size_t) -1)
3538 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3543 if (arginfo
[i
].align
)
3544 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3545 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3547 if (i
== (size_t) -1)
3549 if (bestn
== NULL
|| this_badness
< badness
)
3552 badness
= this_badness
;
3559 for (i
= 0; i
< nargs
; i
++)
3560 if ((arginfo
[i
].dt
== vect_constant_def
3561 || arginfo
[i
].dt
== vect_external_def
)
3562 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3565 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3567 if (arginfo
[i
].vectype
== NULL
3568 || (simd_clone_subparts (arginfo
[i
].vectype
)
3569 > bestn
->simdclone
->simdlen
))
3573 fndecl
= bestn
->decl
;
3574 nunits
= bestn
->simdclone
->simdlen
;
3575 ncopies
= vf
/ nunits
;
3577 /* If the function isn't const, only allow it in simd loops where user
3578 has asserted that at least nunits consecutive iterations can be
3579 performed using SIMD instructions. */
3580 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3581 && gimple_vuse (stmt
))
3584 /* Sanity check: make sure that at least one copy of the vectorized stmt
3585 needs to be generated. */
3586 gcc_assert (ncopies
>= 1);
3588 if (!vec_stmt
) /* transformation not required. */
3590 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3591 for (i
= 0; i
< nargs
; i
++)
3592 if ((bestn
->simdclone
->args
[i
].arg_type
3593 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3594 || (bestn
->simdclone
->args
[i
].arg_type
3595 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3597 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3599 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3600 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3601 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3602 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3603 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3604 tree sll
= arginfo
[i
].simd_lane_linear
3605 ? boolean_true_node
: boolean_false_node
;
3606 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3608 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3609 if (dump_enabled_p ())
3610 dump_printf_loc (MSG_NOTE
, vect_location
,
3611 "=== vectorizable_simd_clone_call ===\n");
3612 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3618 if (dump_enabled_p ())
3619 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3622 scalar_dest
= gimple_call_lhs (stmt
);
3623 vec_dest
= NULL_TREE
;
3628 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3629 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3630 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3633 rtype
= TREE_TYPE (ratype
);
3637 prev_stmt_info
= NULL
;
3638 for (j
= 0; j
< ncopies
; ++j
)
3640 /* Build argument list for the vectorized call. */
3642 vargs
.create (nargs
);
3646 for (i
= 0; i
< nargs
; i
++)
3648 unsigned int k
, l
, m
, o
;
3650 op
= gimple_call_arg (stmt
, i
);
3651 switch (bestn
->simdclone
->args
[i
].arg_type
)
3653 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3654 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3655 o
= nunits
/ simd_clone_subparts (atype
);
3656 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3658 if (simd_clone_subparts (atype
)
3659 < simd_clone_subparts (arginfo
[i
].vectype
))
3661 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3662 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
3663 / simd_clone_subparts (atype
));
3664 gcc_assert ((k
& (k
- 1)) == 0);
3667 = vect_get_vec_def_for_operand (op
, stmt
);
3670 vec_oprnd0
= arginfo
[i
].op
;
3671 if ((m
& (k
- 1)) == 0)
3673 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3676 arginfo
[i
].op
= vec_oprnd0
;
3678 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3680 bitsize_int ((m
& (k
- 1)) * prec
));
3682 = gimple_build_assign (make_ssa_name (atype
),
3684 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3685 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3689 k
= (simd_clone_subparts (atype
)
3690 / simd_clone_subparts (arginfo
[i
].vectype
));
3691 gcc_assert ((k
& (k
- 1)) == 0);
3692 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3694 vec_alloc (ctor_elts
, k
);
3697 for (l
= 0; l
< k
; l
++)
3699 if (m
== 0 && l
== 0)
3701 = vect_get_vec_def_for_operand (op
, stmt
);
3704 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3706 arginfo
[i
].op
= vec_oprnd0
;
3709 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3713 vargs
.safe_push (vec_oprnd0
);
3716 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3718 = gimple_build_assign (make_ssa_name (atype
),
3720 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3721 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3726 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3727 vargs
.safe_push (op
);
3729 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3730 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3735 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3740 edge pe
= loop_preheader_edge (loop
);
3741 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3742 gcc_assert (!new_bb
);
3744 if (arginfo
[i
].simd_lane_linear
)
3746 vargs
.safe_push (arginfo
[i
].op
);
3749 tree phi_res
= copy_ssa_name (op
);
3750 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3751 set_vinfo_for_stmt (new_phi
,
3752 new_stmt_vec_info (new_phi
, loop_vinfo
));
3753 add_phi_arg (new_phi
, arginfo
[i
].op
,
3754 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3756 = POINTER_TYPE_P (TREE_TYPE (op
))
3757 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3758 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3759 ? sizetype
: TREE_TYPE (op
);
3761 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3763 tree tcst
= wide_int_to_tree (type
, cst
);
3764 tree phi_arg
= copy_ssa_name (op
);
3766 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3767 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3768 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3769 set_vinfo_for_stmt (new_stmt
,
3770 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3771 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3773 arginfo
[i
].op
= phi_res
;
3774 vargs
.safe_push (phi_res
);
3779 = POINTER_TYPE_P (TREE_TYPE (op
))
3780 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3781 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3782 ? sizetype
: TREE_TYPE (op
);
3784 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3786 tree tcst
= wide_int_to_tree (type
, cst
);
3787 new_temp
= make_ssa_name (TREE_TYPE (op
));
3788 new_stmt
= gimple_build_assign (new_temp
, code
,
3789 arginfo
[i
].op
, tcst
);
3790 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3791 vargs
.safe_push (new_temp
);
3794 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3795 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3796 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3797 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3798 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3799 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3805 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3808 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
3810 new_temp
= create_tmp_var (ratype
);
3811 else if (simd_clone_subparts (vectype
)
3812 == simd_clone_subparts (rtype
))
3813 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3815 new_temp
= make_ssa_name (rtype
, new_stmt
);
3816 gimple_call_set_lhs (new_stmt
, new_temp
);
3818 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3822 if (simd_clone_subparts (vectype
) < nunits
)
3825 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3826 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
3827 k
= nunits
/ simd_clone_subparts (vectype
);
3828 gcc_assert ((k
& (k
- 1)) == 0);
3829 for (l
= 0; l
< k
; l
++)
3834 t
= build_fold_addr_expr (new_temp
);
3835 t
= build2 (MEM_REF
, vectype
, t
,
3836 build_int_cst (TREE_TYPE (t
), l
* bytes
));
3839 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3840 bitsize_int (prec
), bitsize_int (l
* prec
));
3842 = gimple_build_assign (make_ssa_name (vectype
), t
);
3843 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3844 if (j
== 0 && l
== 0)
3845 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3847 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3849 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3854 tree clobber
= build_constructor (ratype
, NULL
);
3855 TREE_THIS_VOLATILE (clobber
) = 1;
3856 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3857 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3861 else if (simd_clone_subparts (vectype
) > nunits
)
3863 unsigned int k
= (simd_clone_subparts (vectype
)
3864 / simd_clone_subparts (rtype
));
3865 gcc_assert ((k
& (k
- 1)) == 0);
3866 if ((j
& (k
- 1)) == 0)
3867 vec_alloc (ret_ctor_elts
, k
);
3870 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
3871 for (m
= 0; m
< o
; m
++)
3873 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3874 size_int (m
), NULL_TREE
, NULL_TREE
);
3876 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3877 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3878 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3879 gimple_assign_lhs (new_stmt
));
3881 tree clobber
= build_constructor (ratype
, NULL
);
3882 TREE_THIS_VOLATILE (clobber
) = 1;
3883 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3884 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3887 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3888 if ((j
& (k
- 1)) != k
- 1)
3890 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3892 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3893 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3895 if ((unsigned) j
== k
- 1)
3896 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3898 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3900 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3905 tree t
= build_fold_addr_expr (new_temp
);
3906 t
= build2 (MEM_REF
, vectype
, t
,
3907 build_int_cst (TREE_TYPE (t
), 0));
3909 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3910 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3911 tree clobber
= build_constructor (ratype
, NULL
);
3912 TREE_THIS_VOLATILE (clobber
) = 1;
3913 vect_finish_stmt_generation (stmt
,
3914 gimple_build_assign (new_temp
,
3920 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3922 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3924 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3929 /* The call in STMT might prevent it from being removed in dce.
3930 We however cannot remove it here, due to the way the ssa name
3931 it defines is mapped to the new definition. So just replace
3932 rhs of the statement with something harmless. */
3939 type
= TREE_TYPE (scalar_dest
);
3940 if (is_pattern_stmt_p (stmt_info
))
3941 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3943 lhs
= gimple_call_lhs (stmt
);
3944 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3947 new_stmt
= gimple_build_nop ();
3948 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3949 set_vinfo_for_stmt (stmt
, NULL
);
3950 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3951 gsi_replace (gsi
, new_stmt
, true);
3952 unlink_stmt_vdef (stmt
);
3958 /* Function vect_gen_widened_results_half
3960 Create a vector stmt whose code, type, number of arguments, and result
3961 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3962 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3963 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3964 needs to be created (DECL is a function-decl of a target-builtin).
3965 STMT is the original scalar stmt that we are vectorizing. */
3968 vect_gen_widened_results_half (enum tree_code code
,
3970 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3971 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3977 /* Generate half of the widened result: */
3978 if (code
== CALL_EXPR
)
3980 /* Target specific support */
3981 if (op_type
== binary_op
)
3982 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3984 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3985 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3986 gimple_call_set_lhs (new_stmt
, new_temp
);
3990 /* Generic support */
3991 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3992 if (op_type
!= binary_op
)
3994 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3995 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3996 gimple_assign_set_lhs (new_stmt
, new_temp
);
3998 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4004 /* Get vectorized definitions for loop-based vectorization. For the first
4005 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4006 scalar operand), and for the rest we get a copy with
4007 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4008 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4009 The vectors are collected into VEC_OPRNDS. */
4012 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
4013 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4017 /* Get first vector operand. */
4018 /* All the vector operands except the very first one (that is scalar oprnd)
4020 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4021 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
4023 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
4025 vec_oprnds
->quick_push (vec_oprnd
);
4027 /* Get second vector operand. */
4028 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
4029 vec_oprnds
->quick_push (vec_oprnd
);
4033 /* For conversion in multiple steps, continue to get operands
4036 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
4040 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4041 For multi-step conversions store the resulting vectors and call the function
4045 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4046 int multi_step_cvt
, gimple
*stmt
,
4048 gimple_stmt_iterator
*gsi
,
4049 slp_tree slp_node
, enum tree_code code
,
4050 stmt_vec_info
*prev_stmt_info
)
4053 tree vop0
, vop1
, new_tmp
, vec_dest
;
4055 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4057 vec_dest
= vec_dsts
.pop ();
4059 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4061 /* Create demotion operation. */
4062 vop0
= (*vec_oprnds
)[i
];
4063 vop1
= (*vec_oprnds
)[i
+ 1];
4064 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4065 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4066 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4067 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4070 /* Store the resulting vector for next recursive call. */
4071 (*vec_oprnds
)[i
/2] = new_tmp
;
4074 /* This is the last step of the conversion sequence. Store the
4075 vectors in SLP_NODE or in vector info of the scalar statement
4076 (or in STMT_VINFO_RELATED_STMT chain). */
4078 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4081 if (!*prev_stmt_info
)
4082 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4084 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
4086 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4091 /* For multi-step demotion operations we first generate demotion operations
4092 from the source type to the intermediate types, and then combine the
4093 results (stored in VEC_OPRNDS) in demotion operation to the destination
4097 /* At each level of recursion we have half of the operands we had at the
4099 vec_oprnds
->truncate ((i
+1)/2);
4100 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4101 stmt
, vec_dsts
, gsi
, slp_node
,
4102 VEC_PACK_TRUNC_EXPR
,
4106 vec_dsts
.quick_push (vec_dest
);
4110 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4111 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4112 the resulting vectors and call the function recursively. */
4115 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4116 vec
<tree
> *vec_oprnds1
,
4117 gimple
*stmt
, tree vec_dest
,
4118 gimple_stmt_iterator
*gsi
,
4119 enum tree_code code1
,
4120 enum tree_code code2
, tree decl1
,
4121 tree decl2
, int op_type
)
4124 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4125 gimple
*new_stmt1
, *new_stmt2
;
4126 vec
<tree
> vec_tmp
= vNULL
;
4128 vec_tmp
.create (vec_oprnds0
->length () * 2);
4129 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4131 if (op_type
== binary_op
)
4132 vop1
= (*vec_oprnds1
)[i
];
4136 /* Generate the two halves of promotion operation. */
4137 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4138 op_type
, vec_dest
, gsi
, stmt
);
4139 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4140 op_type
, vec_dest
, gsi
, stmt
);
4141 if (is_gimple_call (new_stmt1
))
4143 new_tmp1
= gimple_call_lhs (new_stmt1
);
4144 new_tmp2
= gimple_call_lhs (new_stmt2
);
4148 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4149 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4152 /* Store the results for the next step. */
4153 vec_tmp
.quick_push (new_tmp1
);
4154 vec_tmp
.quick_push (new_tmp2
);
4157 vec_oprnds0
->release ();
4158 *vec_oprnds0
= vec_tmp
;
4162 /* Check if STMT performs a conversion operation, that can be vectorized.
4163 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4164 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4165 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4168 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4169 gimple
**vec_stmt
, slp_tree slp_node
)
4173 tree op0
, op1
= NULL_TREE
;
4174 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4175 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4176 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4177 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4178 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4179 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4182 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4184 gimple
*new_stmt
= NULL
;
4185 stmt_vec_info prev_stmt_info
;
4186 poly_uint64 nunits_in
;
4187 poly_uint64 nunits_out
;
4188 tree vectype_out
, vectype_in
;
4190 tree lhs_type
, rhs_type
;
4191 enum { NARROW
, NONE
, WIDEN
} modifier
;
4192 vec
<tree
> vec_oprnds0
= vNULL
;
4193 vec
<tree
> vec_oprnds1
= vNULL
;
4195 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4196 vec_info
*vinfo
= stmt_info
->vinfo
;
4197 int multi_step_cvt
= 0;
4198 vec
<tree
> interm_types
= vNULL
;
4199 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4201 unsigned short fltsz
;
4203 /* Is STMT a vectorizable conversion? */
4205 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4208 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4212 if (!is_gimple_assign (stmt
))
4215 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4218 code
= gimple_assign_rhs_code (stmt
);
4219 if (!CONVERT_EXPR_CODE_P (code
)
4220 && code
!= FIX_TRUNC_EXPR
4221 && code
!= FLOAT_EXPR
4222 && code
!= WIDEN_MULT_EXPR
4223 && code
!= WIDEN_LSHIFT_EXPR
)
4226 op_type
= TREE_CODE_LENGTH (code
);
4228 /* Check types of lhs and rhs. */
4229 scalar_dest
= gimple_assign_lhs (stmt
);
4230 lhs_type
= TREE_TYPE (scalar_dest
);
4231 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4233 op0
= gimple_assign_rhs1 (stmt
);
4234 rhs_type
= TREE_TYPE (op0
);
4236 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4237 && !((INTEGRAL_TYPE_P (lhs_type
)
4238 && INTEGRAL_TYPE_P (rhs_type
))
4239 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4240 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4243 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4244 && ((INTEGRAL_TYPE_P (lhs_type
)
4245 && !type_has_mode_precision_p (lhs_type
))
4246 || (INTEGRAL_TYPE_P (rhs_type
)
4247 && !type_has_mode_precision_p (rhs_type
))))
4249 if (dump_enabled_p ())
4250 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4251 "type conversion to/from bit-precision unsupported."
4256 /* Check the operands of the operation. */
4257 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4259 if (dump_enabled_p ())
4260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4261 "use not simple.\n");
4264 if (op_type
== binary_op
)
4268 op1
= gimple_assign_rhs2 (stmt
);
4269 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4270 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4272 if (CONSTANT_CLASS_P (op0
))
4273 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4275 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4279 if (dump_enabled_p ())
4280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4281 "use not simple.\n");
4286 /* If op0 is an external or constant defs use a vector type of
4287 the same size as the output vector type. */
4289 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4291 gcc_assert (vectype_in
);
4294 if (dump_enabled_p ())
4296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4297 "no vectype for scalar type ");
4298 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4299 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4305 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4306 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4308 if (dump_enabled_p ())
4310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4311 "can't convert between boolean and non "
4313 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4314 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4320 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4321 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4322 if (known_eq (nunits_out
, nunits_in
))
4324 else if (multiple_p (nunits_out
, nunits_in
))
4328 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4332 /* Multiple types in SLP are handled by creating the appropriate number of
4333 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4337 else if (modifier
== NARROW
)
4338 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4340 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4342 /* Sanity check: make sure that at least one copy of the vectorized stmt
4343 needs to be generated. */
4344 gcc_assert (ncopies
>= 1);
4346 bool found_mode
= false;
4347 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4348 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4349 opt_scalar_mode rhs_mode_iter
;
4351 /* Supportable by target? */
4355 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4357 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4362 if (dump_enabled_p ())
4363 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4364 "conversion not supported by target.\n");
4368 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4369 &code1
, &code2
, &multi_step_cvt
,
4372 /* Binary widening operation can only be supported directly by the
4374 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4378 if (code
!= FLOAT_EXPR
4379 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4382 fltsz
= GET_MODE_SIZE (lhs_mode
);
4383 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4385 rhs_mode
= rhs_mode_iter
.require ();
4386 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4390 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4391 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4392 if (cvt_type
== NULL_TREE
)
4395 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4397 if (!supportable_convert_operation (code
, vectype_out
,
4398 cvt_type
, &decl1
, &codecvt1
))
4401 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4402 cvt_type
, &codecvt1
,
4403 &codecvt2
, &multi_step_cvt
,
4407 gcc_assert (multi_step_cvt
== 0);
4409 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4410 vectype_in
, &code1
, &code2
,
4411 &multi_step_cvt
, &interm_types
))
4421 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4422 codecvt2
= ERROR_MARK
;
4426 interm_types
.safe_push (cvt_type
);
4427 cvt_type
= NULL_TREE
;
4432 gcc_assert (op_type
== unary_op
);
4433 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4434 &code1
, &multi_step_cvt
,
4438 if (code
!= FIX_TRUNC_EXPR
4439 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4443 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4444 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4445 if (cvt_type
== NULL_TREE
)
4447 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4450 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4451 &code1
, &multi_step_cvt
,
4460 if (!vec_stmt
) /* transformation not required. */
4462 if (dump_enabled_p ())
4463 dump_printf_loc (MSG_NOTE
, vect_location
,
4464 "=== vectorizable_conversion ===\n");
4465 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4467 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4468 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4470 else if (modifier
== NARROW
)
4472 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4473 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4477 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4478 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4480 interm_types
.release ();
4485 if (dump_enabled_p ())
4486 dump_printf_loc (MSG_NOTE
, vect_location
,
4487 "transform conversion. ncopies = %d.\n", ncopies
);
4489 if (op_type
== binary_op
)
4491 if (CONSTANT_CLASS_P (op0
))
4492 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4493 else if (CONSTANT_CLASS_P (op1
))
4494 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4497 /* In case of multi-step conversion, we first generate conversion operations
4498 to the intermediate types, and then from that types to the final one.
4499 We create vector destinations for the intermediate type (TYPES) received
4500 from supportable_*_operation, and store them in the correct order
4501 for future use in vect_create_vectorized_*_stmts (). */
4502 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4503 vec_dest
= vect_create_destination_var (scalar_dest
,
4504 (cvt_type
&& modifier
== WIDEN
)
4505 ? cvt_type
: vectype_out
);
4506 vec_dsts
.quick_push (vec_dest
);
4510 for (i
= interm_types
.length () - 1;
4511 interm_types
.iterate (i
, &intermediate_type
); i
--)
4513 vec_dest
= vect_create_destination_var (scalar_dest
,
4515 vec_dsts
.quick_push (vec_dest
);
4520 vec_dest
= vect_create_destination_var (scalar_dest
,
4522 ? vectype_out
: cvt_type
);
4526 if (modifier
== WIDEN
)
4528 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4529 if (op_type
== binary_op
)
4530 vec_oprnds1
.create (1);
4532 else if (modifier
== NARROW
)
4533 vec_oprnds0
.create (
4534 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4536 else if (code
== WIDEN_LSHIFT_EXPR
)
4537 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4540 prev_stmt_info
= NULL
;
4544 for (j
= 0; j
< ncopies
; j
++)
4547 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4549 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4551 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4553 /* Arguments are ready, create the new vector stmt. */
4554 if (code1
== CALL_EXPR
)
4556 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4557 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4558 gimple_call_set_lhs (new_stmt
, new_temp
);
4562 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4563 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4564 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4565 gimple_assign_set_lhs (new_stmt
, new_temp
);
4568 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4570 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4573 if (!prev_stmt_info
)
4574 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4576 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4577 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4584 /* In case the vectorization factor (VF) is bigger than the number
4585 of elements that we can fit in a vectype (nunits), we have to
4586 generate more than one vector stmt - i.e - we need to "unroll"
4587 the vector stmt by a factor VF/nunits. */
4588 for (j
= 0; j
< ncopies
; j
++)
4595 if (code
== WIDEN_LSHIFT_EXPR
)
4600 /* Store vec_oprnd1 for every vector stmt to be created
4601 for SLP_NODE. We check during the analysis that all
4602 the shift arguments are the same. */
4603 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4604 vec_oprnds1
.quick_push (vec_oprnd1
);
4606 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4610 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4611 &vec_oprnds1
, slp_node
);
4615 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4616 vec_oprnds0
.quick_push (vec_oprnd0
);
4617 if (op_type
== binary_op
)
4619 if (code
== WIDEN_LSHIFT_EXPR
)
4622 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4623 vec_oprnds1
.quick_push (vec_oprnd1
);
4629 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4630 vec_oprnds0
.truncate (0);
4631 vec_oprnds0
.quick_push (vec_oprnd0
);
4632 if (op_type
== binary_op
)
4634 if (code
== WIDEN_LSHIFT_EXPR
)
4637 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4639 vec_oprnds1
.truncate (0);
4640 vec_oprnds1
.quick_push (vec_oprnd1
);
4644 /* Arguments are ready. Create the new vector stmts. */
4645 for (i
= multi_step_cvt
; i
>= 0; i
--)
4647 tree this_dest
= vec_dsts
[i
];
4648 enum tree_code c1
= code1
, c2
= code2
;
4649 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4654 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4656 stmt
, this_dest
, gsi
,
4657 c1
, c2
, decl1
, decl2
,
4661 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4665 if (codecvt1
== CALL_EXPR
)
4667 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4668 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4669 gimple_call_set_lhs (new_stmt
, new_temp
);
4673 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4674 new_temp
= make_ssa_name (vec_dest
);
4675 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4679 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4682 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4685 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4688 if (!prev_stmt_info
)
4689 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4691 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4692 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4697 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4701 /* In case the vectorization factor (VF) is bigger than the number
4702 of elements that we can fit in a vectype (nunits), we have to
4703 generate more than one vector stmt - i.e - we need to "unroll"
4704 the vector stmt by a factor VF/nunits. */
4705 for (j
= 0; j
< ncopies
; j
++)
4709 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4713 vec_oprnds0
.truncate (0);
4714 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4715 vect_pow2 (multi_step_cvt
) - 1);
4718 /* Arguments are ready. Create the new vector stmts. */
4720 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4722 if (codecvt1
== CALL_EXPR
)
4724 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4725 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4726 gimple_call_set_lhs (new_stmt
, new_temp
);
4730 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4731 new_temp
= make_ssa_name (vec_dest
);
4732 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4736 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4737 vec_oprnds0
[i
] = new_temp
;
4740 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4741 stmt
, vec_dsts
, gsi
,
4746 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4750 vec_oprnds0
.release ();
4751 vec_oprnds1
.release ();
4752 interm_types
.release ();
4758 /* Function vectorizable_assignment.
4760 Check if STMT performs an assignment (copy) that can be vectorized.
4761 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4762 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4763 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4766 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4767 gimple
**vec_stmt
, slp_tree slp_node
)
4772 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4773 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4776 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
4780 vec
<tree
> vec_oprnds
= vNULL
;
4782 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4783 vec_info
*vinfo
= stmt_info
->vinfo
;
4784 gimple
*new_stmt
= NULL
;
4785 stmt_vec_info prev_stmt_info
= NULL
;
4786 enum tree_code code
;
4789 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4792 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4796 /* Is vectorizable assignment? */
4797 if (!is_gimple_assign (stmt
))
4800 scalar_dest
= gimple_assign_lhs (stmt
);
4801 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4804 code
= gimple_assign_rhs_code (stmt
);
4805 if (gimple_assign_single_p (stmt
)
4806 || code
== PAREN_EXPR
4807 || CONVERT_EXPR_CODE_P (code
))
4808 op
= gimple_assign_rhs1 (stmt
);
4812 if (code
== VIEW_CONVERT_EXPR
)
4813 op
= TREE_OPERAND (op
, 0);
4815 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4816 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4818 /* Multiple types in SLP are handled by creating the appropriate number of
4819 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4824 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4826 gcc_assert (ncopies
>= 1);
4828 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4830 if (dump_enabled_p ())
4831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4832 "use not simple.\n");
4836 /* We can handle NOP_EXPR conversions that do not change the number
4837 of elements or the vector size. */
4838 if ((CONVERT_EXPR_CODE_P (code
)
4839 || code
== VIEW_CONVERT_EXPR
)
4841 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
4842 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
4843 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4846 /* We do not handle bit-precision changes. */
4847 if ((CONVERT_EXPR_CODE_P (code
)
4848 || code
== VIEW_CONVERT_EXPR
)
4849 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4850 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
4851 || !type_has_mode_precision_p (TREE_TYPE (op
)))
4852 /* But a conversion that does not change the bit-pattern is ok. */
4853 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4854 > TYPE_PRECISION (TREE_TYPE (op
)))
4855 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4856 /* Conversion between boolean types of different sizes is
4857 a simple assignment in case their vectypes are same
4859 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4860 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4862 if (dump_enabled_p ())
4863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4864 "type conversion to/from bit-precision "
4869 if (!vec_stmt
) /* transformation not required. */
4871 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_NOTE
, vect_location
,
4874 "=== vectorizable_assignment ===\n");
4875 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4880 if (dump_enabled_p ())
4881 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4884 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4887 for (j
= 0; j
< ncopies
; j
++)
4891 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
4893 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4895 /* Arguments are ready. create the new vector stmt. */
4896 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4898 if (CONVERT_EXPR_CODE_P (code
)
4899 || code
== VIEW_CONVERT_EXPR
)
4900 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4901 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4902 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4903 gimple_assign_set_lhs (new_stmt
, new_temp
);
4904 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4906 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4913 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4915 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4917 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4920 vec_oprnds
.release ();
4925 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4926 either as shift by a scalar or by a vector. */
4929 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4932 machine_mode vec_mode
;
4937 vectype
= get_vectype_for_scalar_type (scalar_type
);
4941 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4943 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4945 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4947 || (optab_handler (optab
, TYPE_MODE (vectype
))
4948 == CODE_FOR_nothing
))
4952 vec_mode
= TYPE_MODE (vectype
);
4953 icode
= (int) optab_handler (optab
, vec_mode
);
4954 if (icode
== CODE_FOR_nothing
)
4961 /* Function vectorizable_shift.
4963 Check if STMT performs a shift operation that can be vectorized.
4964 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4965 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4966 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4969 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4970 gimple
**vec_stmt
, slp_tree slp_node
)
4974 tree op0
, op1
= NULL
;
4975 tree vec_oprnd1
= NULL_TREE
;
4976 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4978 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4979 enum tree_code code
;
4980 machine_mode vec_mode
;
4984 machine_mode optab_op2_mode
;
4986 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4988 gimple
*new_stmt
= NULL
;
4989 stmt_vec_info prev_stmt_info
;
4990 poly_uint64 nunits_in
;
4991 poly_uint64 nunits_out
;
4996 vec
<tree
> vec_oprnds0
= vNULL
;
4997 vec
<tree
> vec_oprnds1
= vNULL
;
5000 bool scalar_shift_arg
= true;
5001 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5002 vec_info
*vinfo
= stmt_info
->vinfo
;
5004 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5007 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5011 /* Is STMT a vectorizable binary/unary operation? */
5012 if (!is_gimple_assign (stmt
))
5015 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5018 code
= gimple_assign_rhs_code (stmt
);
5020 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5021 || code
== RROTATE_EXPR
))
5024 scalar_dest
= gimple_assign_lhs (stmt
);
5025 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5026 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5028 if (dump_enabled_p ())
5029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5030 "bit-precision shifts not supported.\n");
5034 op0
= gimple_assign_rhs1 (stmt
);
5035 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5037 if (dump_enabled_p ())
5038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5039 "use not simple.\n");
5042 /* If op0 is an external or constant def use a vector type with
5043 the same size as the output vector type. */
5045 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5047 gcc_assert (vectype
);
5050 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5052 "no vectype for scalar type\n");
5056 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5057 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5058 if (maybe_ne (nunits_out
, nunits_in
))
5061 op1
= gimple_assign_rhs2 (stmt
);
5062 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
5064 if (dump_enabled_p ())
5065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5066 "use not simple.\n");
5070 /* Multiple types in SLP are handled by creating the appropriate number of
5071 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5076 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5078 gcc_assert (ncopies
>= 1);
5080 /* Determine whether the shift amount is a vector, or scalar. If the
5081 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5083 if ((dt
[1] == vect_internal_def
5084 || dt
[1] == vect_induction_def
)
5086 scalar_shift_arg
= false;
5087 else if (dt
[1] == vect_constant_def
5088 || dt
[1] == vect_external_def
5089 || dt
[1] == vect_internal_def
)
5091 /* In SLP, need to check whether the shift count is the same,
5092 in loops if it is a constant or invariant, it is always
5096 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5099 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
5100 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5101 scalar_shift_arg
= false;
5104 /* If the shift amount is computed by a pattern stmt we cannot
5105 use the scalar amount directly thus give up and use a vector
5107 if (dt
[1] == vect_internal_def
)
5109 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
5110 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
5111 scalar_shift_arg
= false;
5116 if (dump_enabled_p ())
5117 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5118 "operand mode requires invariant argument.\n");
5122 /* Vector shifted by vector. */
5123 if (!scalar_shift_arg
)
5125 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5126 if (dump_enabled_p ())
5127 dump_printf_loc (MSG_NOTE
, vect_location
,
5128 "vector/vector shift/rotate found.\n");
5131 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5132 if (op1_vectype
== NULL_TREE
5133 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5137 "unusable type for last operand in"
5138 " vector/vector shift/rotate.\n");
5142 /* See if the machine has a vector shifted by scalar insn and if not
5143 then see if it has a vector shifted by vector insn. */
5146 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5148 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5150 if (dump_enabled_p ())
5151 dump_printf_loc (MSG_NOTE
, vect_location
,
5152 "vector/scalar shift/rotate found.\n");
5156 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5158 && (optab_handler (optab
, TYPE_MODE (vectype
))
5159 != CODE_FOR_nothing
))
5161 scalar_shift_arg
= false;
5163 if (dump_enabled_p ())
5164 dump_printf_loc (MSG_NOTE
, vect_location
,
5165 "vector/vector shift/rotate found.\n");
5167 /* Unlike the other binary operators, shifts/rotates have
5168 the rhs being int, instead of the same type as the lhs,
5169 so make sure the scalar is the right type if we are
5170 dealing with vectors of long long/long/short/char. */
5171 if (dt
[1] == vect_constant_def
)
5172 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5173 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5177 && TYPE_MODE (TREE_TYPE (vectype
))
5178 != TYPE_MODE (TREE_TYPE (op1
)))
5180 if (dump_enabled_p ())
5181 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5182 "unusable type for last operand in"
5183 " vector/vector shift/rotate.\n");
5186 if (vec_stmt
&& !slp_node
)
5188 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5189 op1
= vect_init_vector (stmt
, op1
,
5190 TREE_TYPE (vectype
), NULL
);
5197 /* Supportable by target? */
5200 if (dump_enabled_p ())
5201 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5205 vec_mode
= TYPE_MODE (vectype
);
5206 icode
= (int) optab_handler (optab
, vec_mode
);
5207 if (icode
== CODE_FOR_nothing
)
5209 if (dump_enabled_p ())
5210 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5211 "op not supported by target.\n");
5212 /* Check only during analysis. */
5213 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5215 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5217 if (dump_enabled_p ())
5218 dump_printf_loc (MSG_NOTE
, vect_location
,
5219 "proceeding using word mode.\n");
5222 /* Worthwhile without SIMD support? Check only during analysis. */
5224 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5225 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5227 if (dump_enabled_p ())
5228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5229 "not worthwhile without SIMD support.\n");
5233 if (!vec_stmt
) /* transformation not required. */
5235 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5236 if (dump_enabled_p ())
5237 dump_printf_loc (MSG_NOTE
, vect_location
,
5238 "=== vectorizable_shift ===\n");
5239 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5245 if (dump_enabled_p ())
5246 dump_printf_loc (MSG_NOTE
, vect_location
,
5247 "transform binary/unary operation.\n");
5250 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5252 prev_stmt_info
= NULL
;
5253 for (j
= 0; j
< ncopies
; j
++)
5258 if (scalar_shift_arg
)
5260 /* Vector shl and shr insn patterns can be defined with scalar
5261 operand 2 (shift operand). In this case, use constant or loop
5262 invariant op1 directly, without extending it to vector mode
5264 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5265 if (!VECTOR_MODE_P (optab_op2_mode
))
5267 if (dump_enabled_p ())
5268 dump_printf_loc (MSG_NOTE
, vect_location
,
5269 "operand 1 using scalar mode.\n");
5271 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5272 vec_oprnds1
.quick_push (vec_oprnd1
);
5275 /* Store vec_oprnd1 for every vector stmt to be created
5276 for SLP_NODE. We check during the analysis that all
5277 the shift arguments are the same.
5278 TODO: Allow different constants for different vector
5279 stmts generated for an SLP instance. */
5280 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5281 vec_oprnds1
.quick_push (vec_oprnd1
);
5286 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5287 (a special case for certain kind of vector shifts); otherwise,
5288 operand 1 should be of a vector type (the usual case). */
5290 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5293 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5297 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5299 /* Arguments are ready. Create the new vector stmt. */
5300 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5302 vop1
= vec_oprnds1
[i
];
5303 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5304 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5305 gimple_assign_set_lhs (new_stmt
, new_temp
);
5306 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5308 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5315 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5317 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5318 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5321 vec_oprnds0
.release ();
5322 vec_oprnds1
.release ();
5328 /* Function vectorizable_operation.
5330 Check if STMT performs a binary, unary or ternary operation that can
5332 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5333 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5334 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5337 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5338 gimple
**vec_stmt
, slp_tree slp_node
)
5342 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5343 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5345 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5346 enum tree_code code
, orig_code
;
5347 machine_mode vec_mode
;
5351 bool target_support_p
;
5353 enum vect_def_type dt
[3]
5354 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5356 gimple
*new_stmt
= NULL
;
5357 stmt_vec_info prev_stmt_info
;
5358 poly_uint64 nunits_in
;
5359 poly_uint64 nunits_out
;
5363 vec
<tree
> vec_oprnds0
= vNULL
;
5364 vec
<tree
> vec_oprnds1
= vNULL
;
5365 vec
<tree
> vec_oprnds2
= vNULL
;
5366 tree vop0
, vop1
, vop2
;
5367 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5368 vec_info
*vinfo
= stmt_info
->vinfo
;
5370 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5373 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5377 /* Is STMT a vectorizable binary/unary operation? */
5378 if (!is_gimple_assign (stmt
))
5381 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5384 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5386 /* For pointer addition and subtraction, we should use the normal
5387 plus and minus for the vector operation. */
5388 if (code
== POINTER_PLUS_EXPR
)
5390 if (code
== POINTER_DIFF_EXPR
)
5393 /* Support only unary or binary operations. */
5394 op_type
= TREE_CODE_LENGTH (code
);
5395 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5397 if (dump_enabled_p ())
5398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5399 "num. args = %d (not unary/binary/ternary op).\n",
5404 scalar_dest
= gimple_assign_lhs (stmt
);
5405 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5407 /* Most operations cannot handle bit-precision types without extra
5409 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5410 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5411 /* Exception are bitwise binary operations. */
5412 && code
!= BIT_IOR_EXPR
5413 && code
!= BIT_XOR_EXPR
5414 && code
!= BIT_AND_EXPR
)
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5418 "bit-precision arithmetic not supported.\n");
5422 op0
= gimple_assign_rhs1 (stmt
);
5423 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5425 if (dump_enabled_p ())
5426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5427 "use not simple.\n");
5430 /* If op0 is an external or constant def use a vector type with
5431 the same size as the output vector type. */
5434 /* For boolean type we cannot determine vectype by
5435 invariant value (don't know whether it is a vector
5436 of booleans or vector of integers). We use output
5437 vectype because operations on boolean don't change
5439 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5441 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5443 if (dump_enabled_p ())
5444 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5445 "not supported operation on bool value.\n");
5448 vectype
= vectype_out
;
5451 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5454 gcc_assert (vectype
);
5457 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5460 "no vectype for scalar type ");
5461 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5463 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5469 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5470 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5471 if (maybe_ne (nunits_out
, nunits_in
))
5474 if (op_type
== binary_op
|| op_type
== ternary_op
)
5476 op1
= gimple_assign_rhs2 (stmt
);
5477 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5479 if (dump_enabled_p ())
5480 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5481 "use not simple.\n");
5485 if (op_type
== ternary_op
)
5487 op2
= gimple_assign_rhs3 (stmt
);
5488 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5490 if (dump_enabled_p ())
5491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5492 "use not simple.\n");
5497 /* Multiple types in SLP are handled by creating the appropriate number of
5498 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5503 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5505 gcc_assert (ncopies
>= 1);
5507 /* Shifts are handled in vectorizable_shift (). */
5508 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5509 || code
== RROTATE_EXPR
)
5512 /* Supportable by target? */
5514 vec_mode
= TYPE_MODE (vectype
);
5515 if (code
== MULT_HIGHPART_EXPR
)
5516 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5519 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5522 if (dump_enabled_p ())
5523 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5527 target_support_p
= (optab_handler (optab
, vec_mode
)
5528 != CODE_FOR_nothing
);
5531 if (!target_support_p
)
5533 if (dump_enabled_p ())
5534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5535 "op not supported by target.\n");
5536 /* Check only during analysis. */
5537 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5538 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5540 if (dump_enabled_p ())
5541 dump_printf_loc (MSG_NOTE
, vect_location
,
5542 "proceeding using word mode.\n");
5545 /* Worthwhile without SIMD support? Check only during analysis. */
5546 if (!VECTOR_MODE_P (vec_mode
)
5548 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5550 if (dump_enabled_p ())
5551 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5552 "not worthwhile without SIMD support.\n");
5556 if (!vec_stmt
) /* transformation not required. */
5558 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5559 if (dump_enabled_p ())
5560 dump_printf_loc (MSG_NOTE
, vect_location
,
5561 "=== vectorizable_operation ===\n");
5562 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5568 if (dump_enabled_p ())
5569 dump_printf_loc (MSG_NOTE
, vect_location
,
5570 "transform binary/unary operation.\n");
5573 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5575 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5576 vectors with unsigned elements, but the result is signed. So, we
5577 need to compute the MINUS_EXPR into vectype temporary and
5578 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5579 tree vec_cvt_dest
= NULL_TREE
;
5580 if (orig_code
== POINTER_DIFF_EXPR
)
5581 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5583 /* In case the vectorization factor (VF) is bigger than the number
5584 of elements that we can fit in a vectype (nunits), we have to generate
5585 more than one vector stmt - i.e - we need to "unroll" the
5586 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5587 from one copy of the vector stmt to the next, in the field
5588 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5589 stages to find the correct vector defs to be used when vectorizing
5590 stmts that use the defs of the current stmt. The example below
5591 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5592 we need to create 4 vectorized stmts):
5594 before vectorization:
5595 RELATED_STMT VEC_STMT
5599 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5601 RELATED_STMT VEC_STMT
5602 VS1_0: vx0 = memref0 VS1_1 -
5603 VS1_1: vx1 = memref1 VS1_2 -
5604 VS1_2: vx2 = memref2 VS1_3 -
5605 VS1_3: vx3 = memref3 - -
5606 S1: x = load - VS1_0
5609 step2: vectorize stmt S2 (done here):
5610 To vectorize stmt S2 we first need to find the relevant vector
5611 def for the first operand 'x'. This is, as usual, obtained from
5612 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5613 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5614 relevant vector def 'vx0'. Having found 'vx0' we can generate
5615 the vector stmt VS2_0, and as usual, record it in the
5616 STMT_VINFO_VEC_STMT of stmt S2.
5617 When creating the second copy (VS2_1), we obtain the relevant vector
5618 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5619 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5620 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5621 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5622 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5623 chain of stmts and pointers:
5624 RELATED_STMT VEC_STMT
5625 VS1_0: vx0 = memref0 VS1_1 -
5626 VS1_1: vx1 = memref1 VS1_2 -
5627 VS1_2: vx2 = memref2 VS1_3 -
5628 VS1_3: vx3 = memref3 - -
5629 S1: x = load - VS1_0
5630 VS2_0: vz0 = vx0 + v1 VS2_1 -
5631 VS2_1: vz1 = vx1 + v1 VS2_2 -
5632 VS2_2: vz2 = vx2 + v1 VS2_3 -
5633 VS2_3: vz3 = vx3 + v1 - -
5634 S2: z = x + 1 - VS2_0 */
5636 prev_stmt_info
= NULL
;
5637 for (j
= 0; j
< ncopies
; j
++)
5642 if (op_type
== binary_op
|| op_type
== ternary_op
)
5643 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5646 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5648 if (op_type
== ternary_op
)
5649 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5654 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5655 if (op_type
== ternary_op
)
5657 tree vec_oprnd
= vec_oprnds2
.pop ();
5658 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5663 /* Arguments are ready. Create the new vector stmt. */
5664 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5666 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5667 ? vec_oprnds1
[i
] : NULL_TREE
);
5668 vop2
= ((op_type
== ternary_op
)
5669 ? vec_oprnds2
[i
] : NULL_TREE
);
5670 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5671 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5672 gimple_assign_set_lhs (new_stmt
, new_temp
);
5673 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5676 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
5677 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
5679 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
5680 gimple_assign_set_lhs (new_stmt
, new_temp
);
5681 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5684 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5691 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5693 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5694 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5697 vec_oprnds0
.release ();
5698 vec_oprnds1
.release ();
5699 vec_oprnds2
.release ();
5704 /* A helper function to ensure data reference DR's base alignment. */
5707 ensure_base_align (struct data_reference
*dr
)
5712 if (DR_VECT_AUX (dr
)->base_misaligned
)
5714 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5716 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
5718 if (decl_in_symtab_p (base_decl
))
5719 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
5722 SET_DECL_ALIGN (base_decl
, align_base_to
);
5723 DECL_USER_ALIGN (base_decl
) = 1;
5725 DR_VECT_AUX (dr
)->base_misaligned
= false;
5730 /* Function get_group_alias_ptr_type.
5732 Return the alias type for the group starting at FIRST_STMT. */
5735 get_group_alias_ptr_type (gimple
*first_stmt
)
5737 struct data_reference
*first_dr
, *next_dr
;
5740 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5741 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5744 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5745 if (get_alias_set (DR_REF (first_dr
))
5746 != get_alias_set (DR_REF (next_dr
)))
5748 if (dump_enabled_p ())
5749 dump_printf_loc (MSG_NOTE
, vect_location
,
5750 "conflicting alias set types.\n");
5751 return ptr_type_node
;
5753 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5755 return reference_alias_ptr_type (DR_REF (first_dr
));
5759 /* Function vectorizable_store.
5761 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5763 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5764 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5765 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5768 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5773 tree vec_oprnd
= NULL_TREE
;
5774 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5775 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5777 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5778 struct loop
*loop
= NULL
;
5779 machine_mode vec_mode
;
5781 enum dr_alignment_support alignment_support_scheme
;
5783 enum vect_def_type dt
;
5784 stmt_vec_info prev_stmt_info
= NULL
;
5785 tree dataref_ptr
= NULL_TREE
;
5786 tree dataref_offset
= NULL_TREE
;
5787 gimple
*ptr_incr
= NULL
;
5790 gimple
*next_stmt
, *first_stmt
;
5792 unsigned int group_size
, i
;
5793 vec
<tree
> oprnds
= vNULL
;
5794 vec
<tree
> result_chain
= vNULL
;
5796 tree offset
= NULL_TREE
;
5797 vec
<tree
> vec_oprnds
= vNULL
;
5798 bool slp
= (slp_node
!= NULL
);
5799 unsigned int vec_num
;
5800 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5801 vec_info
*vinfo
= stmt_info
->vinfo
;
5803 gather_scatter_info gs_info
;
5804 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5807 vec_load_store_type vls_type
;
5810 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5813 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5817 /* Is vectorizable store? */
5819 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
5820 if (is_gimple_assign (stmt
))
5822 tree scalar_dest
= gimple_assign_lhs (stmt
);
5823 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5824 && is_pattern_stmt_p (stmt_info
))
5825 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5826 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5827 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5828 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5829 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5830 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5831 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5832 && TREE_CODE (scalar_dest
) != MEM_REF
)
5837 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
5838 if (!call
|| !gimple_call_internal_p (call
, IFN_MASK_STORE
))
5841 if (slp_node
!= NULL
)
5843 if (dump_enabled_p ())
5844 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5845 "SLP of masked stores not supported.\n");
5849 ref_type
= TREE_TYPE (gimple_call_arg (call
, 1));
5850 mask
= gimple_call_arg (call
, 2);
5851 if (!vect_check_load_store_mask (stmt
, mask
, &mask_vectype
))
5855 op
= vect_get_store_rhs (stmt
);
5857 /* Cannot have hybrid store SLP -- that would mean storing to the
5858 same location twice. */
5859 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5861 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5862 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5866 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5867 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5872 /* Multiple types in SLP are handled by creating the appropriate number of
5873 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5878 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5880 gcc_assert (ncopies
>= 1);
5882 /* FORNOW. This restriction should be relaxed. */
5883 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5885 if (dump_enabled_p ())
5886 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5887 "multiple types in nested loop.\n");
5891 if (!vect_check_store_rhs (stmt
, op
, &rhs_vectype
, &vls_type
))
5894 elem_type
= TREE_TYPE (vectype
);
5895 vec_mode
= TYPE_MODE (vectype
);
5897 if (!STMT_VINFO_DATA_REF (stmt_info
))
5900 vect_memory_access_type memory_access_type
;
5901 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, vls_type
, ncopies
,
5902 &memory_access_type
, &gs_info
))
5907 if (memory_access_type
== VMAT_CONTIGUOUS
)
5909 if (!VECTOR_MODE_P (vec_mode
)
5910 || !can_vec_mask_load_store_p (vec_mode
,
5911 TYPE_MODE (mask_vectype
), false))
5914 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
5916 if (dump_enabled_p ())
5917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5918 "unsupported access type for masked store.\n");
5924 /* FORNOW. In some cases can vectorize even if data-type not supported
5925 (e.g. - array initialization with 0). */
5926 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5930 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5933 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5934 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5935 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5941 group_size
= vec_num
= 1;
5944 if (!vec_stmt
) /* transformation not required. */
5946 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5949 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
5950 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
5951 memory_access_type
);
5953 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5954 /* The SLP costs are calculated during SLP analysis. */
5955 if (!PURE_SLP_STMT (stmt_info
))
5956 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
5957 vls_type
, NULL
, NULL
, NULL
);
5960 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5964 ensure_base_align (dr
);
5966 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5968 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
5969 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5970 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5971 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5972 edge pe
= loop_preheader_edge (loop
);
5975 enum { NARROW
, NONE
, WIDEN
} modifier
;
5976 poly_uint64 scatter_off_nunits
5977 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5979 if (known_eq (nunits
, scatter_off_nunits
))
5981 else if (known_eq (nunits
* 2, scatter_off_nunits
))
5985 /* Currently gathers and scatters are only supported for
5986 fixed-length vectors. */
5987 unsigned int count
= scatter_off_nunits
.to_constant ();
5988 vec_perm_builder
sel (count
, count
, 1);
5989 for (i
= 0; i
< (unsigned int) count
; ++i
)
5990 sel
.quick_push (i
| (count
/ 2));
5992 vec_perm_indices
indices (sel
, 1, count
);
5993 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
5995 gcc_assert (perm_mask
!= NULL_TREE
);
5997 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6001 /* Currently gathers and scatters are only supported for
6002 fixed-length vectors. */
6003 unsigned int count
= nunits
.to_constant ();
6004 vec_perm_builder
sel (count
, count
, 1);
6005 for (i
= 0; i
< (unsigned int) count
; ++i
)
6006 sel
.quick_push (i
| (count
/ 2));
6008 vec_perm_indices
indices (sel
, 2, count
);
6009 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6010 gcc_assert (perm_mask
!= NULL_TREE
);
6016 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6017 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6018 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6019 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6020 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6021 scaletype
= TREE_VALUE (arglist
);
6023 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6024 && TREE_CODE (rettype
) == VOID_TYPE
);
6026 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6027 if (!is_gimple_min_invariant (ptr
))
6029 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6030 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6031 gcc_assert (!new_bb
);
6034 /* Currently we support only unconditional scatter stores,
6035 so mask should be all ones. */
6036 mask
= build_int_cst (masktype
, -1);
6037 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6039 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6041 prev_stmt_info
= NULL
;
6042 for (j
= 0; j
< ncopies
; ++j
)
6047 = vect_get_vec_def_for_operand (op
, stmt
);
6049 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6051 else if (modifier
!= NONE
&& (j
& 1))
6053 if (modifier
== WIDEN
)
6056 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
6057 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6060 else if (modifier
== NARROW
)
6062 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6065 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6074 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
6076 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6080 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6082 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6083 TYPE_VECTOR_SUBPARTS (srctype
)));
6084 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6085 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6086 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6087 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6091 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6093 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6094 TYPE_VECTOR_SUBPARTS (idxtype
)));
6095 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6096 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6097 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6098 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6103 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
6105 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6107 if (prev_stmt_info
== NULL
)
6108 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6110 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6111 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6118 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
6121 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
6123 /* We vectorize all the stmts of the interleaving group when we
6124 reach the last stmt in the group. */
6125 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
6126 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
6135 grouped_store
= false;
6136 /* VEC_NUM is the number of vect stmts to be created for this
6138 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6139 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6140 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
6141 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6142 op
= vect_get_store_rhs (first_stmt
);
6145 /* VEC_NUM is the number of vect stmts to be created for this
6147 vec_num
= group_size
;
6149 ref_type
= get_group_alias_ptr_type (first_stmt
);
6152 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6154 if (dump_enabled_p ())
6155 dump_printf_loc (MSG_NOTE
, vect_location
,
6156 "transform store. ncopies = %d\n", ncopies
);
6158 if (memory_access_type
== VMAT_ELEMENTWISE
6159 || memory_access_type
== VMAT_STRIDED_SLP
)
6161 gimple_stmt_iterator incr_gsi
;
6167 gimple_seq stmts
= NULL
;
6168 tree stride_base
, stride_step
, alias_off
;
6171 /* Checked by get_load_store_type. */
6172 unsigned int const_nunits
= nunits
.to_constant ();
6174 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6175 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6178 = fold_build_pointer_plus
6179 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
6180 size_binop (PLUS_EXPR
,
6181 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
6182 convert_to_ptrofftype (DR_INIT (first_dr
))));
6183 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
6185 /* For a store with loop-invariant (but other than power-of-2)
6186 stride (i.e. not a grouped access) like so:
6188 for (i = 0; i < n; i += stride)
6191 we generate a new induction variable and new stores from
6192 the components of the (vectorized) rhs:
6194 for (j = 0; ; j += VF*stride)
6199 array[j + stride] = tmp2;
6203 unsigned nstores
= const_nunits
;
6205 tree ltype
= elem_type
;
6206 tree lvectype
= vectype
;
6209 if (group_size
< const_nunits
6210 && const_nunits
% group_size
== 0)
6212 nstores
= const_nunits
/ group_size
;
6214 ltype
= build_vector_type (elem_type
, group_size
);
6217 /* First check if vec_extract optab doesn't support extraction
6218 of vector elts directly. */
6219 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6221 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6222 || !VECTOR_MODE_P (vmode
)
6223 || (convert_optab_handler (vec_extract_optab
,
6224 TYPE_MODE (vectype
), vmode
)
6225 == CODE_FOR_nothing
))
6227 /* Try to avoid emitting an extract of vector elements
6228 by performing the extracts using an integer type of the
6229 same size, extracting from a vector of those and then
6230 re-interpreting it as the original vector type if
6233 = group_size
* GET_MODE_BITSIZE (elmode
);
6234 elmode
= int_mode_for_size (lsize
, 0).require ();
6235 unsigned int lnunits
= const_nunits
/ group_size
;
6236 /* If we can't construct such a vector fall back to
6237 element extracts from the original vector type and
6238 element size stores. */
6239 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6240 && VECTOR_MODE_P (vmode
)
6241 && (convert_optab_handler (vec_extract_optab
,
6243 != CODE_FOR_nothing
))
6247 ltype
= build_nonstandard_integer_type (lsize
, 1);
6248 lvectype
= build_vector_type (ltype
, nstores
);
6250 /* Else fall back to vector extraction anyway.
6251 Fewer stores are more important than avoiding spilling
6252 of the vector we extract from. Compared to the
6253 construction case in vectorizable_load no store-forwarding
6254 issue exists here for reasonable archs. */
6257 else if (group_size
>= const_nunits
6258 && group_size
% const_nunits
== 0)
6261 lnel
= const_nunits
;
6265 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6266 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6269 ivstep
= stride_step
;
6270 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6271 build_int_cst (TREE_TYPE (ivstep
), vf
));
6273 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6275 create_iv (stride_base
, ivstep
, NULL
,
6276 loop
, &incr_gsi
, insert_after
,
6278 incr
= gsi_stmt (incr_gsi
);
6279 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6281 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6283 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6285 prev_stmt_info
= NULL
;
6286 alias_off
= build_int_cst (ref_type
, 0);
6287 next_stmt
= first_stmt
;
6288 for (g
= 0; g
< group_size
; g
++)
6290 running_off
= offvar
;
6293 tree size
= TYPE_SIZE_UNIT (ltype
);
6294 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6296 tree newoff
= copy_ssa_name (running_off
, NULL
);
6297 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6299 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6300 running_off
= newoff
;
6302 unsigned int group_el
= 0;
6303 unsigned HOST_WIDE_INT
6304 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6305 for (j
= 0; j
< ncopies
; j
++)
6307 /* We've set op and dt above, from vect_get_store_rhs,
6308 and first_stmt == stmt. */
6313 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6315 vec_oprnd
= vec_oprnds
[0];
6319 op
= vect_get_store_rhs (next_stmt
);
6320 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6326 vec_oprnd
= vec_oprnds
[j
];
6329 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6330 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6333 /* Pun the vector to extract from if necessary. */
6334 if (lvectype
!= vectype
)
6336 tree tem
= make_ssa_name (lvectype
);
6338 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6339 lvectype
, vec_oprnd
));
6340 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6343 for (i
= 0; i
< nstores
; i
++)
6345 tree newref
, newoff
;
6346 gimple
*incr
, *assign
;
6347 tree size
= TYPE_SIZE (ltype
);
6348 /* Extract the i'th component. */
6349 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6350 bitsize_int (i
), size
);
6351 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6354 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6358 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6360 newref
= build2 (MEM_REF
, ltype
,
6361 running_off
, this_off
);
6363 /* And store it to *running_off. */
6364 assign
= gimple_build_assign (newref
, elem
);
6365 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6369 || group_el
== group_size
)
6371 newoff
= copy_ssa_name (running_off
, NULL
);
6372 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6373 running_off
, stride_step
);
6374 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6376 running_off
= newoff
;
6379 if (g
== group_size
- 1
6382 if (j
== 0 && i
== 0)
6383 STMT_VINFO_VEC_STMT (stmt_info
)
6384 = *vec_stmt
= assign
;
6386 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6387 prev_stmt_info
= vinfo_for_stmt (assign
);
6391 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6396 vec_oprnds
.release ();
6400 auto_vec
<tree
> dr_chain (group_size
);
6401 oprnds
.create (group_size
);
6403 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6404 gcc_assert (alignment_support_scheme
);
6405 bool masked_loop_p
= (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6406 /* Targets with store-lane instructions must not require explicit
6407 realignment. vect_supportable_dr_alignment always returns either
6408 dr_aligned or dr_unaligned_supported for masked operations. */
6409 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
6412 || alignment_support_scheme
== dr_aligned
6413 || alignment_support_scheme
== dr_unaligned_supported
);
6415 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6416 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6417 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6419 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6420 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6422 aggr_type
= vectype
;
6425 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
6427 /* In case the vectorization factor (VF) is bigger than the number
6428 of elements that we can fit in a vectype (nunits), we have to generate
6429 more than one vector stmt - i.e - we need to "unroll" the
6430 vector stmt by a factor VF/nunits. For more details see documentation in
6431 vect_get_vec_def_for_copy_stmt. */
6433 /* In case of interleaving (non-unit grouped access):
6440 We create vectorized stores starting from base address (the access of the
6441 first stmt in the chain (S2 in the above example), when the last store stmt
6442 of the chain (S4) is reached:
6445 VS2: &base + vec_size*1 = vx0
6446 VS3: &base + vec_size*2 = vx1
6447 VS4: &base + vec_size*3 = vx3
6449 Then permutation statements are generated:
6451 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6452 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6455 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6456 (the order of the data-refs in the output of vect_permute_store_chain
6457 corresponds to the order of scalar stmts in the interleaving chain - see
6458 the documentation of vect_permute_store_chain()).
6460 In case of both multiple types and interleaving, above vector stores and
6461 permutation stmts are created for every copy. The result vector stmts are
6462 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6463 STMT_VINFO_RELATED_STMT for the next copies.
6466 prev_stmt_info
= NULL
;
6467 tree vec_mask
= NULL_TREE
;
6468 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
6469 for (j
= 0; j
< ncopies
; j
++)
6476 /* Get vectorized arguments for SLP_NODE. */
6477 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6480 vec_oprnd
= vec_oprnds
[0];
6484 /* For interleaved stores we collect vectorized defs for all the
6485 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6486 used as an input to vect_permute_store_chain(), and OPRNDS as
6487 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6489 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6490 OPRNDS are of size 1. */
6491 next_stmt
= first_stmt
;
6492 for (i
= 0; i
< group_size
; i
++)
6494 /* Since gaps are not supported for interleaved stores,
6495 GROUP_SIZE is the exact number of stmts in the chain.
6496 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6497 there is no interleaving, GROUP_SIZE is 1, and only one
6498 iteration of the loop will be executed. */
6499 op
= vect_get_store_rhs (next_stmt
);
6500 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6501 dr_chain
.quick_push (vec_oprnd
);
6502 oprnds
.quick_push (vec_oprnd
);
6503 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6506 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
6510 /* We should have catched mismatched types earlier. */
6511 gcc_assert (useless_type_conversion_p (vectype
,
6512 TREE_TYPE (vec_oprnd
)));
6513 bool simd_lane_access_p
6514 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6515 if (simd_lane_access_p
6516 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6517 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6518 && integer_zerop (DR_OFFSET (first_dr
))
6519 && integer_zerop (DR_INIT (first_dr
))
6520 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6521 get_alias_set (TREE_TYPE (ref_type
))))
6523 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6524 dataref_offset
= build_int_cst (ref_type
, 0);
6529 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6530 simd_lane_access_p
? loop
: NULL
,
6531 offset
, &dummy
, gsi
, &ptr_incr
,
6532 simd_lane_access_p
, &inv_p
);
6533 gcc_assert (bb_vinfo
|| !inv_p
);
6537 /* For interleaved stores we created vectorized defs for all the
6538 defs stored in OPRNDS in the previous iteration (previous copy).
6539 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6540 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6542 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6543 OPRNDS are of size 1. */
6544 for (i
= 0; i
< group_size
; i
++)
6547 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6548 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6549 dr_chain
[i
] = vec_oprnd
;
6550 oprnds
[i
] = vec_oprnd
;
6554 vect_is_simple_use (vec_mask
, vinfo
, &def_stmt
, &dt
);
6555 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
6559 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6560 TYPE_SIZE_UNIT (aggr_type
));
6562 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6563 TYPE_SIZE_UNIT (aggr_type
));
6566 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6570 /* Combine all the vectors into an array. */
6571 vec_array
= create_vector_array (vectype
, vec_num
);
6572 for (i
= 0; i
< vec_num
; i
++)
6574 vec_oprnd
= dr_chain
[i
];
6575 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6578 tree final_mask
= NULL
;
6580 final_mask
= vect_get_loop_mask (gsi
, masks
, ncopies
, vectype
, j
);
6582 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
6589 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6591 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
6592 tree alias_ptr
= build_int_cst (ref_type
, align
);
6593 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
6594 dataref_ptr
, alias_ptr
,
6595 final_mask
, vec_array
);
6600 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6601 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6602 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
6604 gimple_call_set_lhs (call
, data_ref
);
6606 gimple_call_set_nothrow (call
, true);
6608 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6616 result_chain
.create (group_size
);
6618 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6622 next_stmt
= first_stmt
;
6623 for (i
= 0; i
< vec_num
; i
++)
6625 unsigned align
, misalign
;
6627 tree final_mask
= NULL_TREE
;
6629 final_mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6630 vectype
, vec_num
* j
+ i
);
6632 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
6636 /* Bump the vector pointer. */
6637 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6641 vec_oprnd
= vec_oprnds
[i
];
6642 else if (grouped_store
)
6643 /* For grouped stores vectorized defs are interleaved in
6644 vect_permute_store_chain(). */
6645 vec_oprnd
= result_chain
[i
];
6647 align
= DR_TARGET_ALIGNMENT (first_dr
);
6648 if (aligned_access_p (first_dr
))
6650 else if (DR_MISALIGNMENT (first_dr
) == -1)
6652 align
= dr_alignment (vect_dr_behavior (first_dr
));
6656 misalign
= DR_MISALIGNMENT (first_dr
);
6657 if (dataref_offset
== NULL_TREE
6658 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6659 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6662 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6664 tree perm_mask
= perm_mask_for_reverse (vectype
);
6666 = vect_create_destination_var (vect_get_store_rhs (stmt
),
6668 tree new_temp
= make_ssa_name (perm_dest
);
6670 /* Generate the permute statement. */
6672 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6673 vec_oprnd
, perm_mask
);
6674 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6676 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6677 vec_oprnd
= new_temp
;
6680 /* Arguments are ready. Create the new vector stmt. */
6683 align
= least_bit_hwi (misalign
| align
);
6684 tree ptr
= build_int_cst (ref_type
, align
);
6686 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
6688 final_mask
, vec_oprnd
);
6689 gimple_call_set_nothrow (call
, true);
6694 data_ref
= fold_build2 (MEM_REF
, vectype
,
6698 : build_int_cst (ref_type
, 0));
6699 if (aligned_access_p (first_dr
))
6701 else if (DR_MISALIGNMENT (first_dr
) == -1)
6702 TREE_TYPE (data_ref
)
6703 = build_aligned_type (TREE_TYPE (data_ref
),
6704 align
* BITS_PER_UNIT
);
6706 TREE_TYPE (data_ref
)
6707 = build_aligned_type (TREE_TYPE (data_ref
),
6708 TYPE_ALIGN (elem_type
));
6709 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6711 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6716 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6724 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6726 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6727 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6732 result_chain
.release ();
6733 vec_oprnds
.release ();
6738 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6739 VECTOR_CST mask. No checks are made that the target platform supports the
6740 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6741 vect_gen_perm_mask_checked. */
6744 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
6748 poly_uint64 nunits
= sel
.length ();
6749 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
6751 mask_type
= build_vector_type (ssizetype
, nunits
);
6752 return vec_perm_indices_to_tree (mask_type
, sel
);
6755 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6756 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6759 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
6761 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
6762 return vect_gen_perm_mask_any (vectype
, sel
);
6765 /* Given a vector variable X and Y, that was generated for the scalar
6766 STMT, generate instructions to permute the vector elements of X and Y
6767 using permutation mask MASK_VEC, insert them at *GSI and return the
6768 permuted vector variable. */
6771 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6772 gimple_stmt_iterator
*gsi
)
6774 tree vectype
= TREE_TYPE (x
);
6775 tree perm_dest
, data_ref
;
6778 tree scalar_dest
= gimple_get_lhs (stmt
);
6779 if (TREE_CODE (scalar_dest
) == SSA_NAME
)
6780 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6782 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
6783 data_ref
= make_ssa_name (perm_dest
);
6785 /* Generate the permute statement. */
6786 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6787 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6792 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6793 inserting them on the loops preheader edge. Returns true if we
6794 were successful in doing so (and thus STMT can be moved then),
6795 otherwise returns false. */
6798 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6804 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6806 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6807 if (!gimple_nop_p (def_stmt
)
6808 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6810 /* Make sure we don't need to recurse. While we could do
6811 so in simple cases when there are more complex use webs
6812 we don't have an easy way to preserve stmt order to fulfil
6813 dependencies within them. */
6816 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6818 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6820 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6821 if (!gimple_nop_p (def_stmt2
)
6822 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6832 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6834 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6835 if (!gimple_nop_p (def_stmt
)
6836 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6838 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6839 gsi_remove (&gsi
, false);
6840 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6847 /* vectorizable_load.
6849 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6851 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6852 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6853 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6856 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6857 slp_tree slp_node
, slp_instance slp_node_instance
)
6860 tree vec_dest
= NULL
;
6861 tree data_ref
= NULL
;
6862 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6863 stmt_vec_info prev_stmt_info
;
6864 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6865 struct loop
*loop
= NULL
;
6866 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6867 bool nested_in_vect_loop
= false;
6868 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6872 gimple
*new_stmt
= NULL
;
6874 enum dr_alignment_support alignment_support_scheme
;
6875 tree dataref_ptr
= NULL_TREE
;
6876 tree dataref_offset
= NULL_TREE
;
6877 gimple
*ptr_incr
= NULL
;
6880 unsigned int group_size
;
6881 poly_uint64 group_gap_adj
;
6882 tree msq
= NULL_TREE
, lsq
;
6883 tree offset
= NULL_TREE
;
6884 tree byte_offset
= NULL_TREE
;
6885 tree realignment_token
= NULL_TREE
;
6887 vec
<tree
> dr_chain
= vNULL
;
6888 bool grouped_load
= false;
6890 gimple
*first_stmt_for_drptr
= NULL
;
6892 bool compute_in_loop
= false;
6893 struct loop
*at_loop
;
6895 bool slp
= (slp_node
!= NULL
);
6896 bool slp_perm
= false;
6897 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6900 gather_scatter_info gs_info
;
6901 vec_info
*vinfo
= stmt_info
->vinfo
;
6904 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6907 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6911 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6912 if (is_gimple_assign (stmt
))
6914 scalar_dest
= gimple_assign_lhs (stmt
);
6915 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6918 tree_code code
= gimple_assign_rhs_code (stmt
);
6919 if (code
!= ARRAY_REF
6920 && code
!= BIT_FIELD_REF
6921 && code
!= INDIRECT_REF
6922 && code
!= COMPONENT_REF
6923 && code
!= IMAGPART_EXPR
6924 && code
!= REALPART_EXPR
6926 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6931 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
6932 if (!call
|| !gimple_call_internal_p (call
, IFN_MASK_LOAD
))
6935 scalar_dest
= gimple_call_lhs (call
);
6939 if (slp_node
!= NULL
)
6941 if (dump_enabled_p ())
6942 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6943 "SLP of masked loads not supported.\n");
6947 mask
= gimple_call_arg (call
, 2);
6948 if (!vect_check_load_store_mask (stmt
, mask
, &mask_vectype
))
6952 if (!STMT_VINFO_DATA_REF (stmt_info
))
6955 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6956 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6960 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6961 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6962 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6967 /* Multiple types in SLP are handled by creating the appropriate number of
6968 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6973 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6975 gcc_assert (ncopies
>= 1);
6977 /* FORNOW. This restriction should be relaxed. */
6978 if (nested_in_vect_loop
&& ncopies
> 1)
6980 if (dump_enabled_p ())
6981 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6982 "multiple types in nested loop.\n");
6986 /* Invalidate assumptions made by dependence analysis when vectorization
6987 on the unrolled body effectively re-orders stmts. */
6989 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6990 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
6991 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6993 if (dump_enabled_p ())
6994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6995 "cannot perform implicit CSE when unrolling "
6996 "with negative dependence distance\n");
7000 elem_type
= TREE_TYPE (vectype
);
7001 mode
= TYPE_MODE (vectype
);
7003 /* FORNOW. In some cases can vectorize even if data-type not supported
7004 (e.g. - data copies). */
7005 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7007 if (dump_enabled_p ())
7008 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7009 "Aligned load, but unsupported type.\n");
7013 /* Check if the load is a part of an interleaving chain. */
7014 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7016 grouped_load
= true;
7018 gcc_assert (!nested_in_vect_loop
);
7019 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7021 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7022 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7024 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7027 /* Invalidate assumptions made by dependence analysis when vectorization
7028 on the unrolled body effectively re-orders stmts. */
7029 if (!PURE_SLP_STMT (stmt_info
)
7030 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7031 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7032 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7034 if (dump_enabled_p ())
7035 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7036 "cannot perform implicit CSE when performing "
7037 "group loads with negative dependence distance\n");
7041 /* Similarly when the stmt is a load that is both part of a SLP
7042 instance and a loop vectorized stmt via the same-dr mechanism
7043 we have to give up. */
7044 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
7045 && (STMT_SLP_TYPE (stmt_info
)
7046 != STMT_SLP_TYPE (vinfo_for_stmt
7047 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
7049 if (dump_enabled_p ())
7050 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7051 "conflicting SLP types for CSEd load\n");
7058 vect_memory_access_type memory_access_type
;
7059 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7060 &memory_access_type
, &gs_info
))
7065 if (memory_access_type
== VMAT_CONTIGUOUS
)
7067 machine_mode vec_mode
= TYPE_MODE (vectype
);
7068 if (!VECTOR_MODE_P (vec_mode
)
7069 || !can_vec_mask_load_store_p (vec_mode
,
7070 TYPE_MODE (mask_vectype
), true))
7073 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7075 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7077 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
7078 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
7080 if (dump_enabled_p ())
7081 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7082 "masked gather with integer mask not"
7087 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7089 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7091 "unsupported access type for masked load.\n");
7096 if (!vec_stmt
) /* transformation not required. */
7099 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7102 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7103 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7104 memory_access_type
);
7106 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7107 /* The SLP costs are calculated during SLP analysis. */
7108 if (!PURE_SLP_STMT (stmt_info
))
7109 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7115 gcc_assert (memory_access_type
7116 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7118 if (dump_enabled_p ())
7119 dump_printf_loc (MSG_NOTE
, vect_location
,
7120 "transform load. ncopies = %d\n", ncopies
);
7124 ensure_base_align (dr
);
7126 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7128 vect_build_gather_load_calls (stmt
, gsi
, vec_stmt
, &gs_info
, mask
);
7132 if (memory_access_type
== VMAT_ELEMENTWISE
7133 || memory_access_type
== VMAT_STRIDED_SLP
)
7135 gimple_stmt_iterator incr_gsi
;
7141 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7142 gimple_seq stmts
= NULL
;
7143 tree stride_base
, stride_step
, alias_off
;
7144 /* Checked by get_load_store_type. */
7145 unsigned int const_nunits
= nunits
.to_constant ();
7147 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7148 gcc_assert (!nested_in_vect_loop
);
7150 if (slp
&& grouped_load
)
7152 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7153 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7154 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7155 ref_type
= get_group_alias_ptr_type (first_stmt
);
7162 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7166 = fold_build_pointer_plus
7167 (DR_BASE_ADDRESS (first_dr
),
7168 size_binop (PLUS_EXPR
,
7169 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7170 convert_to_ptrofftype (DR_INIT (first_dr
))));
7171 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7173 /* For a load with loop-invariant (but other than power-of-2)
7174 stride (i.e. not a grouped access) like so:
7176 for (i = 0; i < n; i += stride)
7179 we generate a new induction variable and new accesses to
7180 form a new vector (or vectors, depending on ncopies):
7182 for (j = 0; ; j += VF*stride)
7184 tmp2 = array[j + stride];
7186 vectemp = {tmp1, tmp2, ...}
7189 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7190 build_int_cst (TREE_TYPE (stride_step
), vf
));
7192 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7194 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
7195 loop
, &incr_gsi
, insert_after
,
7197 incr
= gsi_stmt (incr_gsi
);
7198 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7200 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
7201 &stmts
, true, NULL_TREE
);
7203 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
7205 prev_stmt_info
= NULL
;
7206 running_off
= offvar
;
7207 alias_off
= build_int_cst (ref_type
, 0);
7208 int nloads
= const_nunits
;
7210 tree ltype
= TREE_TYPE (vectype
);
7211 tree lvectype
= vectype
;
7212 auto_vec
<tree
> dr_chain
;
7213 if (memory_access_type
== VMAT_STRIDED_SLP
)
7215 if (group_size
< const_nunits
)
7217 /* First check if vec_init optab supports construction from
7218 vector elts directly. */
7219 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7221 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7222 && VECTOR_MODE_P (vmode
)
7223 && (convert_optab_handler (vec_init_optab
,
7224 TYPE_MODE (vectype
), vmode
)
7225 != CODE_FOR_nothing
))
7227 nloads
= const_nunits
/ group_size
;
7229 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7233 /* Otherwise avoid emitting a constructor of vector elements
7234 by performing the loads using an integer type of the same
7235 size, constructing a vector of those and then
7236 re-interpreting it as the original vector type.
7237 This avoids a huge runtime penalty due to the general
7238 inability to perform store forwarding from smaller stores
7239 to a larger load. */
7241 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7242 elmode
= int_mode_for_size (lsize
, 0).require ();
7243 unsigned int lnunits
= const_nunits
/ group_size
;
7244 /* If we can't construct such a vector fall back to
7245 element loads of the original vector type. */
7246 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7247 && VECTOR_MODE_P (vmode
)
7248 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7249 != CODE_FOR_nothing
))
7253 ltype
= build_nonstandard_integer_type (lsize
, 1);
7254 lvectype
= build_vector_type (ltype
, nloads
);
7261 lnel
= const_nunits
;
7264 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7268 /* For SLP permutation support we need to load the whole group,
7269 not only the number of vector stmts the permutation result
7273 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7275 unsigned int const_vf
= vf
.to_constant ();
7276 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7277 dr_chain
.create (ncopies
);
7280 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7282 unsigned int group_el
= 0;
7283 unsigned HOST_WIDE_INT
7284 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7285 for (j
= 0; j
< ncopies
; j
++)
7288 vec_alloc (v
, nloads
);
7289 for (i
= 0; i
< nloads
; i
++)
7291 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7293 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7294 build2 (MEM_REF
, ltype
,
7295 running_off
, this_off
));
7296 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7298 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7299 gimple_assign_lhs (new_stmt
));
7303 || group_el
== group_size
)
7305 tree newoff
= copy_ssa_name (running_off
);
7306 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7307 running_off
, stride_step
);
7308 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7310 running_off
= newoff
;
7316 tree vec_inv
= build_constructor (lvectype
, v
);
7317 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7318 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7319 if (lvectype
!= vectype
)
7321 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7323 build1 (VIEW_CONVERT_EXPR
,
7324 vectype
, new_temp
));
7325 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7332 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7334 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7339 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7341 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7342 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7348 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7349 slp_node_instance
, false, &n_perms
);
7356 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7357 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7358 /* For SLP vectorization we directly vectorize a subchain
7359 without permutation. */
7360 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7361 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7362 /* For BB vectorization always use the first stmt to base
7363 the data ref pointer on. */
7365 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7367 /* Check if the chain of loads is already vectorized. */
7368 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7369 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7370 ??? But we can only do so if there is exactly one
7371 as we have no way to get at the rest. Leave the CSE
7373 ??? With the group load eventually participating
7374 in multiple different permutations (having multiple
7375 slp nodes which refer to the same group) the CSE
7376 is even wrong code. See PR56270. */
7379 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7382 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7385 /* VEC_NUM is the number of vect stmts to be created for this group. */
7388 grouped_load
= false;
7389 /* For SLP permutation support we need to load the whole group,
7390 not only the number of vector stmts the permutation result
7394 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7396 unsigned int const_vf
= vf
.to_constant ();
7397 unsigned int const_nunits
= nunits
.to_constant ();
7398 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
7399 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7403 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7405 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7409 vec_num
= group_size
;
7411 ref_type
= get_group_alias_ptr_type (first_stmt
);
7417 group_size
= vec_num
= 1;
7419 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7422 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7423 gcc_assert (alignment_support_scheme
);
7424 bool masked_loop_p
= (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7425 /* Targets with store-lane instructions must not require explicit
7426 realignment. vect_supportable_dr_alignment always returns either
7427 dr_aligned or dr_unaligned_supported for masked operations. */
7428 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7431 || alignment_support_scheme
== dr_aligned
7432 || alignment_support_scheme
== dr_unaligned_supported
);
7434 /* In case the vectorization factor (VF) is bigger than the number
7435 of elements that we can fit in a vectype (nunits), we have to generate
7436 more than one vector stmt - i.e - we need to "unroll" the
7437 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7438 from one copy of the vector stmt to the next, in the field
7439 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7440 stages to find the correct vector defs to be used when vectorizing
7441 stmts that use the defs of the current stmt. The example below
7442 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7443 need to create 4 vectorized stmts):
7445 before vectorization:
7446 RELATED_STMT VEC_STMT
7450 step 1: vectorize stmt S1:
7451 We first create the vector stmt VS1_0, and, as usual, record a
7452 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7453 Next, we create the vector stmt VS1_1, and record a pointer to
7454 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7455 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7457 RELATED_STMT VEC_STMT
7458 VS1_0: vx0 = memref0 VS1_1 -
7459 VS1_1: vx1 = memref1 VS1_2 -
7460 VS1_2: vx2 = memref2 VS1_3 -
7461 VS1_3: vx3 = memref3 - -
7462 S1: x = load - VS1_0
7465 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7466 information we recorded in RELATED_STMT field is used to vectorize
7469 /* In case of interleaving (non-unit grouped access):
7476 Vectorized loads are created in the order of memory accesses
7477 starting from the access of the first stmt of the chain:
7480 VS2: vx1 = &base + vec_size*1
7481 VS3: vx3 = &base + vec_size*2
7482 VS4: vx4 = &base + vec_size*3
7484 Then permutation statements are generated:
7486 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7487 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7490 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7491 (the order of the data-refs in the output of vect_permute_load_chain
7492 corresponds to the order of scalar stmts in the interleaving chain - see
7493 the documentation of vect_permute_load_chain()).
7494 The generation of permutation stmts and recording them in
7495 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7497 In case of both multiple types and interleaving, the vector loads and
7498 permutation stmts above are created for every copy. The result vector
7499 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7500 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7502 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7503 on a target that supports unaligned accesses (dr_unaligned_supported)
7504 we generate the following code:
7508 p = p + indx * vectype_size;
7513 Otherwise, the data reference is potentially unaligned on a target that
7514 does not support unaligned accesses (dr_explicit_realign_optimized) -
7515 then generate the following code, in which the data in each iteration is
7516 obtained by two vector loads, one from the previous iteration, and one
7517 from the current iteration:
7519 msq_init = *(floor(p1))
7520 p2 = initial_addr + VS - 1;
7521 realignment_token = call target_builtin;
7524 p2 = p2 + indx * vectype_size
7526 vec_dest = realign_load (msq, lsq, realignment_token)
7531 /* If the misalignment remains the same throughout the execution of the
7532 loop, we can create the init_addr and permutation mask at the loop
7533 preheader. Otherwise, it needs to be created inside the loop.
7534 This can only occur when vectorizing memory accesses in the inner-loop
7535 nested within an outer-loop that is being vectorized. */
7537 if (nested_in_vect_loop
7538 && !multiple_p (DR_STEP_ALIGNMENT (dr
),
7539 GET_MODE_SIZE (TYPE_MODE (vectype
))))
7541 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7542 compute_in_loop
= true;
7545 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7546 || alignment_support_scheme
== dr_explicit_realign
)
7547 && !compute_in_loop
)
7549 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7550 alignment_support_scheme
, NULL_TREE
,
7552 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7554 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7555 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7562 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7563 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7565 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7566 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7568 aggr_type
= vectype
;
7570 tree vec_mask
= NULL_TREE
;
7571 prev_stmt_info
= NULL
;
7572 poly_uint64 group_elt
= 0;
7573 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
7574 for (j
= 0; j
< ncopies
; j
++)
7576 /* 1. Create the vector or array pointer update chain. */
7579 bool simd_lane_access_p
7580 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7581 if (simd_lane_access_p
7582 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7583 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7584 && integer_zerop (DR_OFFSET (first_dr
))
7585 && integer_zerop (DR_INIT (first_dr
))
7586 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7587 get_alias_set (TREE_TYPE (ref_type
)))
7588 && (alignment_support_scheme
== dr_aligned
7589 || alignment_support_scheme
== dr_unaligned_supported
))
7591 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7592 dataref_offset
= build_int_cst (ref_type
, 0);
7595 else if (first_stmt_for_drptr
7596 && first_stmt
!= first_stmt_for_drptr
)
7599 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7600 at_loop
, offset
, &dummy
, gsi
,
7601 &ptr_incr
, simd_lane_access_p
,
7602 &inv_p
, byte_offset
);
7603 /* Adjust the pointer by the difference to first_stmt. */
7604 data_reference_p ptrdr
7605 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7606 tree diff
= fold_convert (sizetype
,
7607 size_binop (MINUS_EXPR
,
7610 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7615 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7616 offset
, &dummy
, gsi
, &ptr_incr
,
7617 simd_lane_access_p
, &inv_p
,
7620 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
7626 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7627 TYPE_SIZE_UNIT (aggr_type
));
7629 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7630 TYPE_SIZE_UNIT (aggr_type
));
7635 vect_is_simple_use (vec_mask
, vinfo
, &def_stmt
, &dt
);
7636 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
7640 if (grouped_load
|| slp_perm
)
7641 dr_chain
.create (vec_num
);
7643 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7647 vec_array
= create_vector_array (vectype
, vec_num
);
7649 tree final_mask
= NULL_TREE
;
7651 final_mask
= vect_get_loop_mask (gsi
, masks
, ncopies
, vectype
, j
);
7653 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7660 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
7662 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7663 tree alias_ptr
= build_int_cst (ref_type
, align
);
7664 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
7665 dataref_ptr
, alias_ptr
,
7671 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7672 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7673 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7675 gimple_call_set_lhs (call
, vec_array
);
7676 gimple_call_set_nothrow (call
, true);
7678 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7680 /* Extract each vector into an SSA_NAME. */
7681 for (i
= 0; i
< vec_num
; i
++)
7683 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7685 dr_chain
.quick_push (new_temp
);
7688 /* Record the mapping between SSA_NAMEs and statements. */
7689 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7693 for (i
= 0; i
< vec_num
; i
++)
7695 tree final_mask
= NULL_TREE
;
7697 && memory_access_type
!= VMAT_INVARIANT
)
7698 final_mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
7699 vectype
, vec_num
* j
+ i
);
7701 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7705 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7708 /* 2. Create the vector-load in the loop. */
7709 switch (alignment_support_scheme
)
7712 case dr_unaligned_supported
:
7714 unsigned int align
, misalign
;
7716 align
= DR_TARGET_ALIGNMENT (dr
);
7717 if (alignment_support_scheme
== dr_aligned
)
7719 gcc_assert (aligned_access_p (first_dr
));
7722 else if (DR_MISALIGNMENT (first_dr
) == -1)
7724 align
= dr_alignment (vect_dr_behavior (first_dr
));
7728 misalign
= DR_MISALIGNMENT (first_dr
);
7729 if (dataref_offset
== NULL_TREE
7730 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7731 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7736 align
= least_bit_hwi (misalign
| align
);
7737 tree ptr
= build_int_cst (ref_type
, align
);
7739 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
7742 gimple_call_set_nothrow (call
, true);
7744 data_ref
= NULL_TREE
;
7749 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7752 : build_int_cst (ref_type
, 0));
7753 if (alignment_support_scheme
== dr_aligned
)
7755 else if (DR_MISALIGNMENT (first_dr
) == -1)
7756 TREE_TYPE (data_ref
)
7757 = build_aligned_type (TREE_TYPE (data_ref
),
7758 align
* BITS_PER_UNIT
);
7760 TREE_TYPE (data_ref
)
7761 = build_aligned_type (TREE_TYPE (data_ref
),
7762 TYPE_ALIGN (elem_type
));
7766 case dr_explicit_realign
:
7770 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7772 if (compute_in_loop
)
7773 msq
= vect_setup_realignment (first_stmt
, gsi
,
7775 dr_explicit_realign
,
7778 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7779 ptr
= copy_ssa_name (dataref_ptr
);
7781 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7782 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7783 new_stmt
= gimple_build_assign
7784 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7786 (TREE_TYPE (dataref_ptr
),
7787 -(HOST_WIDE_INT
) align
));
7788 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7790 = build2 (MEM_REF
, vectype
, ptr
,
7791 build_int_cst (ref_type
, 0));
7792 vec_dest
= vect_create_destination_var (scalar_dest
,
7794 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7795 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7796 gimple_assign_set_lhs (new_stmt
, new_temp
);
7797 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7798 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7799 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7802 bump
= size_binop (MULT_EXPR
, vs
,
7803 TYPE_SIZE_UNIT (elem_type
));
7804 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7805 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7806 new_stmt
= gimple_build_assign
7807 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7809 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
7810 ptr
= copy_ssa_name (ptr
, new_stmt
);
7811 gimple_assign_set_lhs (new_stmt
, ptr
);
7812 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7814 = build2 (MEM_REF
, vectype
, ptr
,
7815 build_int_cst (ref_type
, 0));
7818 case dr_explicit_realign_optimized
:
7820 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7821 new_temp
= copy_ssa_name (dataref_ptr
);
7823 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7824 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7825 new_stmt
= gimple_build_assign
7826 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7827 build_int_cst (TREE_TYPE (dataref_ptr
),
7828 -(HOST_WIDE_INT
) align
));
7829 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7831 = build2 (MEM_REF
, vectype
, new_temp
,
7832 build_int_cst (ref_type
, 0));
7838 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7839 /* DATA_REF is null if we've already built the statement. */
7841 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7842 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7843 gimple_set_lhs (new_stmt
, new_temp
);
7844 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7846 /* 3. Handle explicit realignment if necessary/supported.
7848 vec_dest = realign_load (msq, lsq, realignment_token) */
7849 if (alignment_support_scheme
== dr_explicit_realign_optimized
7850 || alignment_support_scheme
== dr_explicit_realign
)
7852 lsq
= gimple_assign_lhs (new_stmt
);
7853 if (!realignment_token
)
7854 realignment_token
= dataref_ptr
;
7855 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7856 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7857 msq
, lsq
, realignment_token
);
7858 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7859 gimple_assign_set_lhs (new_stmt
, new_temp
);
7860 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7862 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7865 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7866 add_phi_arg (phi
, lsq
,
7867 loop_latch_edge (containing_loop
),
7873 /* 4. Handle invariant-load. */
7874 if (inv_p
&& !bb_vinfo
)
7876 gcc_assert (!grouped_load
);
7877 /* If we have versioned for aliasing or the loop doesn't
7878 have any data dependencies that would preclude this,
7879 then we are sure this is a loop invariant load and
7880 thus we can insert it on the preheader edge. */
7881 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7882 && !nested_in_vect_loop
7883 && hoist_defs_of_uses (stmt
, loop
))
7885 if (dump_enabled_p ())
7887 dump_printf_loc (MSG_NOTE
, vect_location
,
7888 "hoisting out of the vectorized "
7890 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7892 tree tem
= copy_ssa_name (scalar_dest
);
7893 gsi_insert_on_edge_immediate
7894 (loop_preheader_edge (loop
),
7895 gimple_build_assign (tem
,
7897 (gimple_assign_rhs1 (stmt
))));
7898 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7899 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7900 set_vinfo_for_stmt (new_stmt
,
7901 new_stmt_vec_info (new_stmt
, vinfo
));
7905 gimple_stmt_iterator gsi2
= *gsi
;
7907 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7909 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7913 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7915 tree perm_mask
= perm_mask_for_reverse (vectype
);
7916 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7917 perm_mask
, stmt
, gsi
);
7918 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7921 /* Collect vector loads and later create their permutation in
7922 vect_transform_grouped_load (). */
7923 if (grouped_load
|| slp_perm
)
7924 dr_chain
.quick_push (new_temp
);
7926 /* Store vector loads in the corresponding SLP_NODE. */
7927 if (slp
&& !slp_perm
)
7928 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7930 /* With SLP permutation we load the gaps as well, without
7931 we need to skip the gaps after we manage to fully load
7932 all elements. group_gap_adj is GROUP_SIZE here. */
7933 group_elt
+= nunits
;
7934 if (maybe_ne (group_gap_adj
, 0U)
7936 && known_eq (group_elt
, group_size
- group_gap_adj
))
7938 poly_wide_int bump_val
7939 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7941 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7942 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7947 /* Bump the vector pointer to account for a gap or for excess
7948 elements loaded for a permuted SLP load. */
7949 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
7951 poly_wide_int bump_val
7952 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7954 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7955 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7960 if (slp
&& !slp_perm
)
7966 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7967 slp_node_instance
, false,
7970 dr_chain
.release ();
7978 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7979 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7980 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7985 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7987 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7988 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7991 dr_chain
.release ();
7997 /* Function vect_is_simple_cond.
8000 LOOP - the loop that is being vectorized.
8001 COND - Condition that is checked for simple use.
8004 *COMP_VECTYPE - the vector type for the comparison.
8005 *DTS - The def types for the arguments of the comparison
8007 Returns whether a COND can be vectorized. Checks whether
8008 condition operands are supportable using vec_is_simple_use. */
8011 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8012 tree
*comp_vectype
, enum vect_def_type
*dts
,
8016 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8019 if (TREE_CODE (cond
) == SSA_NAME
8020 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8022 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
8023 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
8024 &dts
[0], comp_vectype
)
8026 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8031 if (!COMPARISON_CLASS_P (cond
))
8034 lhs
= TREE_OPERAND (cond
, 0);
8035 rhs
= TREE_OPERAND (cond
, 1);
8037 if (TREE_CODE (lhs
) == SSA_NAME
)
8039 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
8040 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
8043 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8044 || TREE_CODE (lhs
) == FIXED_CST
)
8045 dts
[0] = vect_constant_def
;
8049 if (TREE_CODE (rhs
) == SSA_NAME
)
8051 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
8052 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
8055 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8056 || TREE_CODE (rhs
) == FIXED_CST
)
8057 dts
[1] = vect_constant_def
;
8061 if (vectype1
&& vectype2
8062 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8063 TYPE_VECTOR_SUBPARTS (vectype2
)))
8066 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8067 /* Invariant comparison. */
8068 if (! *comp_vectype
)
8070 tree scalar_type
= TREE_TYPE (lhs
);
8071 /* If we can widen the comparison to match vectype do so. */
8072 if (INTEGRAL_TYPE_P (scalar_type
)
8073 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8074 TYPE_SIZE (TREE_TYPE (vectype
))))
8075 scalar_type
= build_nonstandard_integer_type
8076 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8077 TYPE_UNSIGNED (scalar_type
));
8078 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8084 /* vectorizable_condition.
8086 Check if STMT is conditional modify expression that can be vectorized.
8087 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8088 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8091 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8092 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8093 else clause if it is 2).
8095 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8098 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8099 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
8102 tree scalar_dest
= NULL_TREE
;
8103 tree vec_dest
= NULL_TREE
;
8104 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8105 tree then_clause
, else_clause
;
8106 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8107 tree comp_vectype
= NULL_TREE
;
8108 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8109 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8112 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8113 enum vect_def_type dts
[4]
8114 = {vect_unknown_def_type
, vect_unknown_def_type
,
8115 vect_unknown_def_type
, vect_unknown_def_type
};
8118 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8119 stmt_vec_info prev_stmt_info
= NULL
;
8121 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8122 vec
<tree
> vec_oprnds0
= vNULL
;
8123 vec
<tree
> vec_oprnds1
= vNULL
;
8124 vec
<tree
> vec_oprnds2
= vNULL
;
8125 vec
<tree
> vec_oprnds3
= vNULL
;
8127 bool masked
= false;
8129 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
8132 vect_reduction_type reduction_type
8133 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8134 if (reduction_type
== TREE_CODE_REDUCTION
)
8136 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8139 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8140 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8144 /* FORNOW: not yet supported. */
8145 if (STMT_VINFO_LIVE_P (stmt_info
))
8147 if (dump_enabled_p ())
8148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8149 "value used after loop.\n");
8154 /* Is vectorizable conditional operation? */
8155 if (!is_gimple_assign (stmt
))
8158 code
= gimple_assign_rhs_code (stmt
);
8160 if (code
!= COND_EXPR
)
8163 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8164 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8169 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8171 gcc_assert (ncopies
>= 1);
8172 if (reduc_index
&& ncopies
> 1)
8173 return false; /* FORNOW */
8175 cond_expr
= gimple_assign_rhs1 (stmt
);
8176 then_clause
= gimple_assign_rhs2 (stmt
);
8177 else_clause
= gimple_assign_rhs3 (stmt
);
8179 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8180 &comp_vectype
, &dts
[0], vectype
)
8185 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
8188 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
8192 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8195 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8198 masked
= !COMPARISON_CLASS_P (cond_expr
);
8199 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8201 if (vec_cmp_type
== NULL_TREE
)
8204 cond_code
= TREE_CODE (cond_expr
);
8207 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8208 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8211 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8213 /* Boolean values may have another representation in vectors
8214 and therefore we prefer bit operations over comparison for
8215 them (which also works for scalar masks). We store opcodes
8216 to use in bitop1 and bitop2. Statement is vectorized as
8217 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8218 depending on bitop1 and bitop2 arity. */
8222 bitop1
= BIT_NOT_EXPR
;
8223 bitop2
= BIT_AND_EXPR
;
8226 bitop1
= BIT_NOT_EXPR
;
8227 bitop2
= BIT_IOR_EXPR
;
8230 bitop1
= BIT_NOT_EXPR
;
8231 bitop2
= BIT_AND_EXPR
;
8232 std::swap (cond_expr0
, cond_expr1
);
8235 bitop1
= BIT_NOT_EXPR
;
8236 bitop2
= BIT_IOR_EXPR
;
8237 std::swap (cond_expr0
, cond_expr1
);
8240 bitop1
= BIT_XOR_EXPR
;
8243 bitop1
= BIT_XOR_EXPR
;
8244 bitop2
= BIT_NOT_EXPR
;
8249 cond_code
= SSA_NAME
;
8254 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8255 if (bitop1
!= NOP_EXPR
)
8257 machine_mode mode
= TYPE_MODE (comp_vectype
);
8260 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8261 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8264 if (bitop2
!= NOP_EXPR
)
8266 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8268 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8272 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8275 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
8285 vec_oprnds0
.create (1);
8286 vec_oprnds1
.create (1);
8287 vec_oprnds2
.create (1);
8288 vec_oprnds3
.create (1);
8292 scalar_dest
= gimple_assign_lhs (stmt
);
8293 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
8294 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8296 /* Handle cond expr. */
8297 for (j
= 0; j
< ncopies
; j
++)
8299 gimple
*new_stmt
= NULL
;
8304 auto_vec
<tree
, 4> ops
;
8305 auto_vec
<vec
<tree
>, 4> vec_defs
;
8308 ops
.safe_push (cond_expr
);
8311 ops
.safe_push (cond_expr0
);
8312 ops
.safe_push (cond_expr1
);
8314 ops
.safe_push (then_clause
);
8315 ops
.safe_push (else_clause
);
8316 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8317 vec_oprnds3
= vec_defs
.pop ();
8318 vec_oprnds2
= vec_defs
.pop ();
8320 vec_oprnds1
= vec_defs
.pop ();
8321 vec_oprnds0
= vec_defs
.pop ();
8329 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8331 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8337 = vect_get_vec_def_for_operand (cond_expr0
,
8338 stmt
, comp_vectype
);
8339 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8342 = vect_get_vec_def_for_operand (cond_expr1
,
8343 stmt
, comp_vectype
);
8344 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8346 if (reduc_index
== 1)
8347 vec_then_clause
= reduc_def
;
8350 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8352 vect_is_simple_use (then_clause
, loop_vinfo
,
8355 if (reduc_index
== 2)
8356 vec_else_clause
= reduc_def
;
8359 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8361 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8368 = vect_get_vec_def_for_stmt_copy (dts
[0],
8369 vec_oprnds0
.pop ());
8372 = vect_get_vec_def_for_stmt_copy (dts
[1],
8373 vec_oprnds1
.pop ());
8375 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8376 vec_oprnds2
.pop ());
8377 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8378 vec_oprnds3
.pop ());
8383 vec_oprnds0
.quick_push (vec_cond_lhs
);
8385 vec_oprnds1
.quick_push (vec_cond_rhs
);
8386 vec_oprnds2
.quick_push (vec_then_clause
);
8387 vec_oprnds3
.quick_push (vec_else_clause
);
8390 /* Arguments are ready. Create the new vector stmt. */
8391 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8393 vec_then_clause
= vec_oprnds2
[i
];
8394 vec_else_clause
= vec_oprnds3
[i
];
8397 vec_compare
= vec_cond_lhs
;
8400 vec_cond_rhs
= vec_oprnds1
[i
];
8401 if (bitop1
== NOP_EXPR
)
8402 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8403 vec_cond_lhs
, vec_cond_rhs
);
8406 new_temp
= make_ssa_name (vec_cmp_type
);
8407 if (bitop1
== BIT_NOT_EXPR
)
8408 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8412 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8414 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8415 if (bitop2
== NOP_EXPR
)
8416 vec_compare
= new_temp
;
8417 else if (bitop2
== BIT_NOT_EXPR
)
8419 /* Instead of doing ~x ? y : z do x ? z : y. */
8420 vec_compare
= new_temp
;
8421 std::swap (vec_then_clause
, vec_else_clause
);
8425 vec_compare
= make_ssa_name (vec_cmp_type
);
8427 = gimple_build_assign (vec_compare
, bitop2
,
8428 vec_cond_lhs
, new_temp
);
8429 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8433 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
8435 if (!is_gimple_val (vec_compare
))
8437 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
8438 new_stmt
= gimple_build_assign (vec_compare_name
,
8440 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8441 vec_compare
= vec_compare_name
;
8443 gcc_assert (reduc_index
== 2);
8444 new_stmt
= gimple_build_call_internal
8445 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
8447 gimple_call_set_lhs (new_stmt
, scalar_dest
);
8448 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
8449 if (stmt
== gsi_stmt (*gsi
))
8450 vect_finish_replace_stmt (stmt
, new_stmt
);
8453 /* In this case we're moving the definition to later in the
8454 block. That doesn't matter because the only uses of the
8455 lhs are in phi statements. */
8456 gimple_stmt_iterator old_gsi
= gsi_for_stmt (stmt
);
8457 gsi_remove (&old_gsi
, true);
8458 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8463 new_temp
= make_ssa_name (vec_dest
);
8464 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8465 vec_compare
, vec_then_clause
,
8467 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8470 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8477 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8479 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8481 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8484 vec_oprnds0
.release ();
8485 vec_oprnds1
.release ();
8486 vec_oprnds2
.release ();
8487 vec_oprnds3
.release ();
8492 /* vectorizable_comparison.
8494 Check if STMT is comparison expression that can be vectorized.
8495 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8496 comparison, put it in VEC_STMT, and insert it at GSI.
8498 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8501 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8502 gimple
**vec_stmt
, tree reduc_def
,
8505 tree lhs
, rhs1
, rhs2
;
8506 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8507 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8508 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8509 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8511 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8512 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8516 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8517 stmt_vec_info prev_stmt_info
= NULL
;
8519 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8520 vec
<tree
> vec_oprnds0
= vNULL
;
8521 vec
<tree
> vec_oprnds1
= vNULL
;
8526 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8529 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8532 mask_type
= vectype
;
8533 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8538 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8540 gcc_assert (ncopies
>= 1);
8541 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8542 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8546 if (STMT_VINFO_LIVE_P (stmt_info
))
8548 if (dump_enabled_p ())
8549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8550 "value used after loop.\n");
8554 if (!is_gimple_assign (stmt
))
8557 code
= gimple_assign_rhs_code (stmt
);
8559 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8562 rhs1
= gimple_assign_rhs1 (stmt
);
8563 rhs2
= gimple_assign_rhs2 (stmt
);
8565 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8566 &dts
[0], &vectype1
))
8569 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8570 &dts
[1], &vectype2
))
8573 if (vectype1
&& vectype2
8574 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8575 TYPE_VECTOR_SUBPARTS (vectype2
)))
8578 vectype
= vectype1
? vectype1
: vectype2
;
8580 /* Invariant comparison. */
8583 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8584 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
8587 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
8590 /* Can't compare mask and non-mask types. */
8591 if (vectype1
&& vectype2
8592 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8595 /* Boolean values may have another representation in vectors
8596 and therefore we prefer bit operations over comparison for
8597 them (which also works for scalar masks). We store opcodes
8598 to use in bitop1 and bitop2. Statement is vectorized as
8599 BITOP2 (rhs1 BITOP1 rhs2) or
8600 rhs1 BITOP2 (BITOP1 rhs2)
8601 depending on bitop1 and bitop2 arity. */
8602 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8604 if (code
== GT_EXPR
)
8606 bitop1
= BIT_NOT_EXPR
;
8607 bitop2
= BIT_AND_EXPR
;
8609 else if (code
== GE_EXPR
)
8611 bitop1
= BIT_NOT_EXPR
;
8612 bitop2
= BIT_IOR_EXPR
;
8614 else if (code
== LT_EXPR
)
8616 bitop1
= BIT_NOT_EXPR
;
8617 bitop2
= BIT_AND_EXPR
;
8618 std::swap (rhs1
, rhs2
);
8619 std::swap (dts
[0], dts
[1]);
8621 else if (code
== LE_EXPR
)
8623 bitop1
= BIT_NOT_EXPR
;
8624 bitop2
= BIT_IOR_EXPR
;
8625 std::swap (rhs1
, rhs2
);
8626 std::swap (dts
[0], dts
[1]);
8630 bitop1
= BIT_XOR_EXPR
;
8631 if (code
== EQ_EXPR
)
8632 bitop2
= BIT_NOT_EXPR
;
8638 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8639 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8640 dts
, ndts
, NULL
, NULL
);
8641 if (bitop1
== NOP_EXPR
)
8642 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8645 machine_mode mode
= TYPE_MODE (vectype
);
8648 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8649 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8652 if (bitop2
!= NOP_EXPR
)
8654 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8655 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8665 vec_oprnds0
.create (1);
8666 vec_oprnds1
.create (1);
8670 lhs
= gimple_assign_lhs (stmt
);
8671 mask
= vect_create_destination_var (lhs
, mask_type
);
8673 /* Handle cmp expr. */
8674 for (j
= 0; j
< ncopies
; j
++)
8676 gassign
*new_stmt
= NULL
;
8681 auto_vec
<tree
, 2> ops
;
8682 auto_vec
<vec
<tree
>, 2> vec_defs
;
8684 ops
.safe_push (rhs1
);
8685 ops
.safe_push (rhs2
);
8686 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8687 vec_oprnds1
= vec_defs
.pop ();
8688 vec_oprnds0
= vec_defs
.pop ();
8692 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8693 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8698 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8699 vec_oprnds0
.pop ());
8700 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8701 vec_oprnds1
.pop ());
8706 vec_oprnds0
.quick_push (vec_rhs1
);
8707 vec_oprnds1
.quick_push (vec_rhs2
);
8710 /* Arguments are ready. Create the new vector stmt. */
8711 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8713 vec_rhs2
= vec_oprnds1
[i
];
8715 new_temp
= make_ssa_name (mask
);
8716 if (bitop1
== NOP_EXPR
)
8718 new_stmt
= gimple_build_assign (new_temp
, code
,
8719 vec_rhs1
, vec_rhs2
);
8720 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8724 if (bitop1
== BIT_NOT_EXPR
)
8725 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8727 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8729 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8730 if (bitop2
!= NOP_EXPR
)
8732 tree res
= make_ssa_name (mask
);
8733 if (bitop2
== BIT_NOT_EXPR
)
8734 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8736 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8738 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8742 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8749 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8751 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8753 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8756 vec_oprnds0
.release ();
8757 vec_oprnds1
.release ();
8762 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8763 can handle all live statements in the node. Otherwise return true
8764 if STMT is not live or if vectorizable_live_operation can handle it.
8765 GSI and VEC_STMT are as for vectorizable_live_operation. */
8768 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8769 slp_tree slp_node
, gimple
**vec_stmt
)
8775 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8777 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8778 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8779 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8784 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
8785 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
))
8791 /* Make sure the statement is vectorizable. */
8794 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
8795 slp_instance node_instance
)
8797 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8798 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8799 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8801 gimple
*pattern_stmt
;
8802 gimple_seq pattern_def_seq
;
8804 if (dump_enabled_p ())
8806 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8807 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8810 if (gimple_has_volatile_ops (stmt
))
8812 if (dump_enabled_p ())
8813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8814 "not vectorized: stmt has volatile operands\n");
8819 /* Skip stmts that do not need to be vectorized. In loops this is expected
8821 - the COND_EXPR which is the loop exit condition
8822 - any LABEL_EXPRs in the loop
8823 - computations that are used only for array indexing or loop control.
8824 In basic blocks we only analyze statements that are a part of some SLP
8825 instance, therefore, all the statements are relevant.
8827 Pattern statement needs to be analyzed instead of the original statement
8828 if the original statement is not relevant. Otherwise, we analyze both
8829 statements. In basic blocks we are called from some SLP instance
8830 traversal, don't analyze pattern stmts instead, the pattern stmts
8831 already will be part of SLP instance. */
8833 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8834 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8835 && !STMT_VINFO_LIVE_P (stmt_info
))
8837 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8839 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8840 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8842 /* Analyze PATTERN_STMT instead of the original stmt. */
8843 stmt
= pattern_stmt
;
8844 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8845 if (dump_enabled_p ())
8847 dump_printf_loc (MSG_NOTE
, vect_location
,
8848 "==> examining pattern statement: ");
8849 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8854 if (dump_enabled_p ())
8855 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8860 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8863 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8864 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8866 /* Analyze PATTERN_STMT too. */
8867 if (dump_enabled_p ())
8869 dump_printf_loc (MSG_NOTE
, vect_location
,
8870 "==> examining pattern statement: ");
8871 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8874 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
8879 if (is_pattern_stmt_p (stmt_info
)
8881 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8883 gimple_stmt_iterator si
;
8885 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8887 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8888 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8889 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8891 /* Analyze def stmt of STMT if it's a pattern stmt. */
8892 if (dump_enabled_p ())
8894 dump_printf_loc (MSG_NOTE
, vect_location
,
8895 "==> examining pattern def statement: ");
8896 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8899 if (!vect_analyze_stmt (pattern_def_stmt
,
8900 need_to_vectorize
, node
, node_instance
))
8906 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8908 case vect_internal_def
:
8911 case vect_reduction_def
:
8912 case vect_nested_cycle
:
8913 gcc_assert (!bb_vinfo
8914 && (relevance
== vect_used_in_outer
8915 || relevance
== vect_used_in_outer_by_reduction
8916 || relevance
== vect_used_by_reduction
8917 || relevance
== vect_unused_in_scope
8918 || relevance
== vect_used_only_live
));
8921 case vect_induction_def
:
8922 gcc_assert (!bb_vinfo
);
8925 case vect_constant_def
:
8926 case vect_external_def
:
8927 case vect_unknown_def_type
:
8932 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8934 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8935 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8936 || (is_gimple_call (stmt
)
8937 && gimple_call_lhs (stmt
) == NULL_TREE
));
8938 *need_to_vectorize
= true;
8941 if (PURE_SLP_STMT (stmt_info
) && !node
)
8943 dump_printf_loc (MSG_NOTE
, vect_location
,
8944 "handled only by SLP analysis\n");
8950 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8951 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8952 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8953 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8954 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8955 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8956 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8957 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8958 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8959 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8960 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
)
8961 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
8962 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8963 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8967 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8968 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8969 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8970 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8971 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8972 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8973 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8974 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8975 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8976 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8981 if (dump_enabled_p ())
8983 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8984 "not vectorized: relevant stmt not ");
8985 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8986 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8995 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8996 need extra handling, except for vectorizable reductions. */
8997 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8998 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
))
9000 if (dump_enabled_p ())
9002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9003 "not vectorized: live stmt not supported: ");
9004 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
9014 /* Function vect_transform_stmt.
9016 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9019 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9020 bool *grouped_store
, slp_tree slp_node
,
9021 slp_instance slp_node_instance
)
9023 bool is_store
= false;
9024 gimple
*vec_stmt
= NULL
;
9025 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9028 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9029 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9031 switch (STMT_VINFO_TYPE (stmt_info
))
9033 case type_demotion_vec_info_type
:
9034 case type_promotion_vec_info_type
:
9035 case type_conversion_vec_info_type
:
9036 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
9040 case induc_vec_info_type
:
9041 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
9045 case shift_vec_info_type
:
9046 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
9050 case op_vec_info_type
:
9051 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
9055 case assignment_vec_info_type
:
9056 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
9060 case load_vec_info_type
:
9061 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
9066 case store_vec_info_type
:
9067 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
9069 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9071 /* In case of interleaving, the whole chain is vectorized when the
9072 last store in the chain is reached. Store stmts before the last
9073 one are skipped, and there vec_stmt_info shouldn't be freed
9075 *grouped_store
= true;
9076 if (STMT_VINFO_VEC_STMT (stmt_info
))
9083 case condition_vec_info_type
:
9084 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
9088 case comparison_vec_info_type
:
9089 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
9093 case call_vec_info_type
:
9094 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
9095 stmt
= gsi_stmt (*gsi
);
9098 case call_simd_clone_vec_info_type
:
9099 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
9100 stmt
= gsi_stmt (*gsi
);
9103 case reduc_vec_info_type
:
9104 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
9110 if (!STMT_VINFO_LIVE_P (stmt_info
))
9112 if (dump_enabled_p ())
9113 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9114 "stmt not supported.\n");
9119 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9120 This would break hybrid SLP vectorization. */
9122 gcc_assert (!vec_stmt
9123 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
9125 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9126 is being vectorized, but outside the immediately enclosing loop. */
9128 && STMT_VINFO_LOOP_VINFO (stmt_info
)
9129 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
9130 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
9131 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9132 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9133 || STMT_VINFO_RELEVANT (stmt_info
) ==
9134 vect_used_in_outer_by_reduction
))
9136 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9137 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9138 imm_use_iterator imm_iter
;
9139 use_operand_p use_p
;
9143 if (dump_enabled_p ())
9144 dump_printf_loc (MSG_NOTE
, vect_location
,
9145 "Record the vdef for outer-loop vectorization.\n");
9147 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9148 (to be used when vectorizing outer-loop stmts that use the DEF of
9150 if (gimple_code (stmt
) == GIMPLE_PHI
)
9151 scalar_dest
= PHI_RESULT (stmt
);
9153 scalar_dest
= gimple_assign_lhs (stmt
);
9155 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9157 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9159 exit_phi
= USE_STMT (use_p
);
9160 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
9165 /* Handle stmts whose DEF is used outside the loop-nest that is
9166 being vectorized. */
9167 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9169 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
);
9174 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9180 /* Remove a group of stores (for SLP or interleaving), free their
9184 vect_remove_stores (gimple
*first_stmt
)
9186 gimple
*next
= first_stmt
;
9188 gimple_stmt_iterator next_si
;
9192 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
9194 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
9195 if (is_pattern_stmt_p (stmt_info
))
9196 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
9197 /* Free the attached stmt_vec_info and remove the stmt. */
9198 next_si
= gsi_for_stmt (next
);
9199 unlink_stmt_vdef (next
);
9200 gsi_remove (&next_si
, true);
9201 release_defs (next
);
9202 free_stmt_vec_info (next
);
9208 /* Function new_stmt_vec_info.
9210 Create and initialize a new stmt_vec_info struct for STMT. */
9213 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
9216 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
9218 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
9219 STMT_VINFO_STMT (res
) = stmt
;
9221 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
9222 STMT_VINFO_LIVE_P (res
) = false;
9223 STMT_VINFO_VECTYPE (res
) = NULL
;
9224 STMT_VINFO_VEC_STMT (res
) = NULL
;
9225 STMT_VINFO_VECTORIZABLE (res
) = true;
9226 STMT_VINFO_IN_PATTERN_P (res
) = false;
9227 STMT_VINFO_RELATED_STMT (res
) = NULL
;
9228 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
9229 STMT_VINFO_DATA_REF (res
) = NULL
;
9230 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
9231 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
9233 if (gimple_code (stmt
) == GIMPLE_PHI
9234 && is_loop_header_bb_p (gimple_bb (stmt
)))
9235 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
9237 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
9239 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
9240 STMT_SLP_TYPE (res
) = loop_vect
;
9241 STMT_VINFO_NUM_SLP_USES (res
) = 0;
9243 GROUP_FIRST_ELEMENT (res
) = NULL
;
9244 GROUP_NEXT_ELEMENT (res
) = NULL
;
9245 GROUP_SIZE (res
) = 0;
9246 GROUP_STORE_COUNT (res
) = 0;
9247 GROUP_GAP (res
) = 0;
9248 GROUP_SAME_DR_STMT (res
) = NULL
;
9254 /* Create a hash table for stmt_vec_info. */
9257 init_stmt_vec_info_vec (void)
9259 gcc_assert (!stmt_vec_info_vec
.exists ());
9260 stmt_vec_info_vec
.create (50);
9264 /* Free hash table for stmt_vec_info. */
9267 free_stmt_vec_info_vec (void)
9271 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
9273 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9274 gcc_assert (stmt_vec_info_vec
.exists ());
9275 stmt_vec_info_vec
.release ();
9279 /* Free stmt vectorization related info. */
9282 free_stmt_vec_info (gimple
*stmt
)
9284 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9289 /* Check if this statement has a related "pattern stmt"
9290 (introduced by the vectorizer during the pattern recognition
9291 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9293 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9295 stmt_vec_info patt_info
9296 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9299 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
9300 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9301 gimple_set_bb (patt_stmt
, NULL
);
9302 tree lhs
= gimple_get_lhs (patt_stmt
);
9303 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9304 release_ssa_name (lhs
);
9307 gimple_stmt_iterator si
;
9308 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
9310 gimple
*seq_stmt
= gsi_stmt (si
);
9311 gimple_set_bb (seq_stmt
, NULL
);
9312 lhs
= gimple_get_lhs (seq_stmt
);
9313 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9314 release_ssa_name (lhs
);
9315 free_stmt_vec_info (seq_stmt
);
9318 free_stmt_vec_info (patt_stmt
);
9322 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9323 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9324 set_vinfo_for_stmt (stmt
, NULL
);
9329 /* Function get_vectype_for_scalar_type_and_size.
9331 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9335 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9337 tree orig_scalar_type
= scalar_type
;
9338 scalar_mode inner_mode
;
9339 machine_mode simd_mode
;
9343 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9344 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9347 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9349 /* For vector types of elements whose mode precision doesn't
9350 match their types precision we use a element type of mode
9351 precision. The vectorization routines will have to make sure
9352 they support the proper result truncation/extension.
9353 We also make sure to build vector types with INTEGER_TYPE
9354 component type only. */
9355 if (INTEGRAL_TYPE_P (scalar_type
)
9356 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9357 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9358 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9359 TYPE_UNSIGNED (scalar_type
));
9361 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9362 When the component mode passes the above test simply use a type
9363 corresponding to that mode. The theory is that any use that
9364 would cause problems with this will disable vectorization anyway. */
9365 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9366 && !INTEGRAL_TYPE_P (scalar_type
))
9367 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9369 /* We can't build a vector type of elements with alignment bigger than
9371 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9372 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9373 TYPE_UNSIGNED (scalar_type
));
9375 /* If we felt back to using the mode fail if there was
9376 no scalar type for it. */
9377 if (scalar_type
== NULL_TREE
)
9380 /* If no size was supplied use the mode the target prefers. Otherwise
9381 lookup a vector mode of the specified size. */
9382 if (known_eq (size
, 0U))
9383 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9384 else if (!multiple_p (size
, nbytes
, &nunits
)
9385 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
9387 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9388 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
9391 vectype
= build_vector_type (scalar_type
, nunits
);
9393 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9394 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9397 /* Re-attach the address-space qualifier if we canonicalized the scalar
9399 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9400 return build_qualified_type
9401 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9406 poly_uint64 current_vector_size
;
9408 /* Function get_vectype_for_scalar_type.
9410 Returns the vector type corresponding to SCALAR_TYPE as supported
9414 get_vectype_for_scalar_type (tree scalar_type
)
9417 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9418 current_vector_size
);
9420 && known_eq (current_vector_size
, 0U))
9421 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9425 /* Function get_mask_type_for_scalar_type.
9427 Returns the mask type corresponding to a result of comparison
9428 of vectors of specified SCALAR_TYPE as supported by target. */
9431 get_mask_type_for_scalar_type (tree scalar_type
)
9433 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9438 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9439 current_vector_size
);
9442 /* Function get_same_sized_vectype
9444 Returns a vector type corresponding to SCALAR_TYPE of size
9445 VECTOR_TYPE if supported by the target. */
9448 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9450 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9451 return build_same_sized_truth_vector_type (vector_type
);
9453 return get_vectype_for_scalar_type_and_size
9454 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9457 /* Function vect_is_simple_use.
9460 VINFO - the vect info of the loop or basic block that is being vectorized.
9461 OPERAND - operand in the loop or bb.
9463 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9464 DT - the type of definition
9466 Returns whether a stmt with OPERAND can be vectorized.
9467 For loops, supportable operands are constants, loop invariants, and operands
9468 that are defined by the current iteration of the loop. Unsupportable
9469 operands are those that are defined by a previous iteration of the loop (as
9470 is the case in reduction/induction computations).
9471 For basic blocks, supportable operands are constants and bb invariants.
9472 For now, operands defined outside the basic block are not supported. */
9475 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9476 gimple
**def_stmt
, enum vect_def_type
*dt
)
9479 *dt
= vect_unknown_def_type
;
9481 if (dump_enabled_p ())
9483 dump_printf_loc (MSG_NOTE
, vect_location
,
9484 "vect_is_simple_use: operand ");
9485 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9486 dump_printf (MSG_NOTE
, "\n");
9489 if (CONSTANT_CLASS_P (operand
))
9491 *dt
= vect_constant_def
;
9495 if (is_gimple_min_invariant (operand
))
9497 *dt
= vect_external_def
;
9501 if (TREE_CODE (operand
) != SSA_NAME
)
9503 if (dump_enabled_p ())
9504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9509 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9511 *dt
= vect_external_def
;
9515 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9516 if (dump_enabled_p ())
9518 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9519 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9522 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9523 *dt
= vect_external_def
;
9526 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9527 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9530 if (dump_enabled_p ())
9532 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9535 case vect_uninitialized_def
:
9536 dump_printf (MSG_NOTE
, "uninitialized\n");
9538 case vect_constant_def
:
9539 dump_printf (MSG_NOTE
, "constant\n");
9541 case vect_external_def
:
9542 dump_printf (MSG_NOTE
, "external\n");
9544 case vect_internal_def
:
9545 dump_printf (MSG_NOTE
, "internal\n");
9547 case vect_induction_def
:
9548 dump_printf (MSG_NOTE
, "induction\n");
9550 case vect_reduction_def
:
9551 dump_printf (MSG_NOTE
, "reduction\n");
9553 case vect_double_reduction_def
:
9554 dump_printf (MSG_NOTE
, "double reduction\n");
9556 case vect_nested_cycle
:
9557 dump_printf (MSG_NOTE
, "nested cycle\n");
9559 case vect_unknown_def_type
:
9560 dump_printf (MSG_NOTE
, "unknown\n");
9565 if (*dt
== vect_unknown_def_type
)
9567 if (dump_enabled_p ())
9568 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9569 "Unsupported pattern.\n");
9573 switch (gimple_code (*def_stmt
))
9580 if (dump_enabled_p ())
9581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9582 "unsupported defining stmt:\n");
9589 /* Function vect_is_simple_use.
9591 Same as vect_is_simple_use but also determines the vector operand
9592 type of OPERAND and stores it to *VECTYPE. If the definition of
9593 OPERAND is vect_uninitialized_def, vect_constant_def or
9594 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9595 is responsible to compute the best suited vector type for the
9599 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9600 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9602 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9605 /* Now get a vector type if the def is internal, otherwise supply
9606 NULL_TREE and leave it up to the caller to figure out a proper
9607 type for the use stmt. */
9608 if (*dt
== vect_internal_def
9609 || *dt
== vect_induction_def
9610 || *dt
== vect_reduction_def
9611 || *dt
== vect_double_reduction_def
9612 || *dt
== vect_nested_cycle
)
9614 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9616 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9617 && !STMT_VINFO_RELEVANT (stmt_info
)
9618 && !STMT_VINFO_LIVE_P (stmt_info
))
9619 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9621 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9622 gcc_assert (*vectype
!= NULL_TREE
);
9624 else if (*dt
== vect_uninitialized_def
9625 || *dt
== vect_constant_def
9626 || *dt
== vect_external_def
)
9627 *vectype
= NULL_TREE
;
9635 /* Function supportable_widening_operation
9637 Check whether an operation represented by the code CODE is a
9638 widening operation that is supported by the target platform in
9639 vector form (i.e., when operating on arguments of type VECTYPE_IN
9640 producing a result of type VECTYPE_OUT).
9642 Widening operations we currently support are NOP (CONVERT), FLOAT
9643 and WIDEN_MULT. This function checks if these operations are supported
9644 by the target platform either directly (via vector tree-codes), or via
9648 - CODE1 and CODE2 are codes of vector operations to be used when
9649 vectorizing the operation, if available.
9650 - MULTI_STEP_CVT determines the number of required intermediate steps in
9651 case of multi-step conversion (like char->short->int - in that case
9652 MULTI_STEP_CVT will be 1).
9653 - INTERM_TYPES contains the intermediate type required to perform the
9654 widening operation (short in the above example). */
9657 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9658 tree vectype_out
, tree vectype_in
,
9659 enum tree_code
*code1
, enum tree_code
*code2
,
9660 int *multi_step_cvt
,
9661 vec
<tree
> *interm_types
)
9663 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9664 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9665 struct loop
*vect_loop
= NULL
;
9666 machine_mode vec_mode
;
9667 enum insn_code icode1
, icode2
;
9668 optab optab1
, optab2
;
9669 tree vectype
= vectype_in
;
9670 tree wide_vectype
= vectype_out
;
9671 enum tree_code c1
, c2
;
9673 tree prev_type
, intermediate_type
;
9674 machine_mode intermediate_mode
, prev_mode
;
9675 optab optab3
, optab4
;
9677 *multi_step_cvt
= 0;
9679 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9683 case WIDEN_MULT_EXPR
:
9684 /* The result of a vectorized widening operation usually requires
9685 two vectors (because the widened results do not fit into one vector).
9686 The generated vector results would normally be expected to be
9687 generated in the same order as in the original scalar computation,
9688 i.e. if 8 results are generated in each vector iteration, they are
9689 to be organized as follows:
9690 vect1: [res1,res2,res3,res4],
9691 vect2: [res5,res6,res7,res8].
9693 However, in the special case that the result of the widening
9694 operation is used in a reduction computation only, the order doesn't
9695 matter (because when vectorizing a reduction we change the order of
9696 the computation). Some targets can take advantage of this and
9697 generate more efficient code. For example, targets like Altivec,
9698 that support widen_mult using a sequence of {mult_even,mult_odd}
9699 generate the following vectors:
9700 vect1: [res1,res3,res5,res7],
9701 vect2: [res2,res4,res6,res8].
9703 When vectorizing outer-loops, we execute the inner-loop sequentially
9704 (each vectorized inner-loop iteration contributes to VF outer-loop
9705 iterations in parallel). We therefore don't allow to change the
9706 order of the computation in the inner-loop during outer-loop
9708 /* TODO: Another case in which order doesn't *really* matter is when we
9709 widen and then contract again, e.g. (short)((int)x * y >> 8).
9710 Normally, pack_trunc performs an even/odd permute, whereas the
9711 repack from an even/odd expansion would be an interleave, which
9712 would be significantly simpler for e.g. AVX2. */
9713 /* In any case, in order to avoid duplicating the code below, recurse
9714 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9715 are properly set up for the caller. If we fail, we'll continue with
9716 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9718 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9719 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9720 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9721 stmt
, vectype_out
, vectype_in
,
9722 code1
, code2
, multi_step_cvt
,
9725 /* Elements in a vector with vect_used_by_reduction property cannot
9726 be reordered if the use chain with this property does not have the
9727 same operation. One such an example is s += a * b, where elements
9728 in a and b cannot be reordered. Here we check if the vector defined
9729 by STMT is only directly used in the reduction statement. */
9730 tree lhs
= gimple_assign_lhs (stmt
);
9731 use_operand_p dummy
;
9733 stmt_vec_info use_stmt_info
= NULL
;
9734 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9735 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9736 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9739 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9740 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9753 case VEC_WIDEN_MULT_EVEN_EXPR
:
9754 /* Support the recursion induced just above. */
9755 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9756 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9759 case WIDEN_LSHIFT_EXPR
:
9760 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9761 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9765 c1
= VEC_UNPACK_LO_EXPR
;
9766 c2
= VEC_UNPACK_HI_EXPR
;
9770 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9771 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9774 case FIX_TRUNC_EXPR
:
9775 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9776 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9777 computing the operation. */
9784 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9787 if (code
== FIX_TRUNC_EXPR
)
9789 /* The signedness is determined from output operand. */
9790 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9791 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9795 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9796 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9799 if (!optab1
|| !optab2
)
9802 vec_mode
= TYPE_MODE (vectype
);
9803 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9804 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9810 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9811 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9812 /* For scalar masks we may have different boolean
9813 vector types having the same QImode. Thus we
9814 add additional check for elements number. */
9815 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9816 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
9817 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
9819 /* Check if it's a multi-step conversion that can be done using intermediate
9822 prev_type
= vectype
;
9823 prev_mode
= vec_mode
;
9825 if (!CONVERT_EXPR_CODE_P (code
))
9828 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9829 intermediate steps in promotion sequence. We try
9830 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9832 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9833 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9835 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9836 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9838 intermediate_type
= vect_halve_mask_nunits (prev_type
);
9839 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9844 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9845 TYPE_UNSIGNED (prev_type
));
9847 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9848 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9850 if (!optab3
|| !optab4
9851 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9852 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9853 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9854 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9855 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9856 == CODE_FOR_nothing
)
9857 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9858 == CODE_FOR_nothing
))
9861 interm_types
->quick_push (intermediate_type
);
9862 (*multi_step_cvt
)++;
9864 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9865 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9866 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9867 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
9868 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
9870 prev_type
= intermediate_type
;
9871 prev_mode
= intermediate_mode
;
9874 interm_types
->release ();
9879 /* Function supportable_narrowing_operation
9881 Check whether an operation represented by the code CODE is a
9882 narrowing operation that is supported by the target platform in
9883 vector form (i.e., when operating on arguments of type VECTYPE_IN
9884 and producing a result of type VECTYPE_OUT).
9886 Narrowing operations we currently support are NOP (CONVERT) and
9887 FIX_TRUNC. This function checks if these operations are supported by
9888 the target platform directly via vector tree-codes.
9891 - CODE1 is the code of a vector operation to be used when
9892 vectorizing the operation, if available.
9893 - MULTI_STEP_CVT determines the number of required intermediate steps in
9894 case of multi-step conversion (like int->short->char - in that case
9895 MULTI_STEP_CVT will be 1).
9896 - INTERM_TYPES contains the intermediate type required to perform the
9897 narrowing operation (short in the above example). */
9900 supportable_narrowing_operation (enum tree_code code
,
9901 tree vectype_out
, tree vectype_in
,
9902 enum tree_code
*code1
, int *multi_step_cvt
,
9903 vec
<tree
> *interm_types
)
9905 machine_mode vec_mode
;
9906 enum insn_code icode1
;
9907 optab optab1
, interm_optab
;
9908 tree vectype
= vectype_in
;
9909 tree narrow_vectype
= vectype_out
;
9911 tree intermediate_type
, prev_type
;
9912 machine_mode intermediate_mode
, prev_mode
;
9916 *multi_step_cvt
= 0;
9920 c1
= VEC_PACK_TRUNC_EXPR
;
9923 case FIX_TRUNC_EXPR
:
9924 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9928 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9929 tree code and optabs used for computing the operation. */
9936 if (code
== FIX_TRUNC_EXPR
)
9937 /* The signedness is determined from output operand. */
9938 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9940 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9945 vec_mode
= TYPE_MODE (vectype
);
9946 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9951 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9952 /* For scalar masks we may have different boolean
9953 vector types having the same QImode. Thus we
9954 add additional check for elements number. */
9955 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9956 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
9957 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9959 /* Check if it's a multi-step conversion that can be done using intermediate
9961 prev_mode
= vec_mode
;
9962 prev_type
= vectype
;
9963 if (code
== FIX_TRUNC_EXPR
)
9964 uns
= TYPE_UNSIGNED (vectype_out
);
9966 uns
= TYPE_UNSIGNED (vectype
);
9968 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9969 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9970 costly than signed. */
9971 if (code
== FIX_TRUNC_EXPR
&& uns
)
9973 enum insn_code icode2
;
9976 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9978 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9979 if (interm_optab
!= unknown_optab
9980 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9981 && insn_data
[icode1
].operand
[0].mode
9982 == insn_data
[icode2
].operand
[0].mode
)
9985 optab1
= interm_optab
;
9990 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9991 intermediate steps in promotion sequence. We try
9992 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9993 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9994 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9996 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9997 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9999 intermediate_type
= vect_double_mask_nunits (prev_type
);
10000 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10005 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10007 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10010 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10011 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10012 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10013 == CODE_FOR_nothing
))
10016 interm_types
->quick_push (intermediate_type
);
10017 (*multi_step_cvt
)++;
10019 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10020 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10021 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10022 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10024 prev_mode
= intermediate_mode
;
10025 prev_type
= intermediate_type
;
10026 optab1
= interm_optab
;
10029 interm_types
->release ();
10033 /* Generate and return a statement that sets vector mask MASK such that
10034 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10037 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10039 tree cmp_type
= TREE_TYPE (start_index
);
10040 tree mask_type
= TREE_TYPE (mask
);
10041 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10042 cmp_type
, mask_type
,
10043 OPTIMIZE_FOR_SPEED
));
10044 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10045 start_index
, end_index
,
10046 build_zero_cst (mask_type
));
10047 gimple_call_set_lhs (call
, mask
);
10051 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10052 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10055 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10058 tree tmp
= make_ssa_name (mask_type
);
10059 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10060 gimple_seq_add_stmt (seq
, call
);
10061 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);