1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 int misalign
, enum vect_cost_model_location where
)
97 if ((kind
== vector_load
|| kind
== unaligned_load
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_gather_load
;
100 if ((kind
== vector_store
|| kind
== unaligned_store
)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
102 kind
= vector_scatter_store
;
104 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
105 body_cost_vec
->safe_push (si
);
107 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
128 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
130 tree vect_type
, vect
, vect_name
, array_ref
;
133 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
134 vect_type
= TREE_TYPE (TREE_TYPE (array
));
135 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
136 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
137 build_int_cst (size_type_node
, n
),
138 NULL_TREE
, NULL_TREE
);
140 new_stmt
= gimple_build_assign (vect
, array_ref
);
141 vect_name
= make_ssa_name (vect
, new_stmt
);
142 gimple_assign_set_lhs (new_stmt
, vect_name
);
143 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
153 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
154 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
159 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
160 build_int_cst (size_type_node
, n
),
161 NULL_TREE
, NULL_TREE
);
163 new_stmt
= gimple_build_assign (array_ref
, vect
);
164 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
172 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
176 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
186 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree clobber
= build_clobber (TREE_TYPE (var
));
190 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
191 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
202 enum vect_relevant relevant
, bool live_p
)
204 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
205 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE
, vect_location
,
209 "mark relevant %d, live %d: %G", relevant
, live_p
,
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE
, vect_location
,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info
= stmt_info
;
228 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
230 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
231 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
234 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
235 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
236 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
238 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE
, vect_location
,
243 "already marked relevant/live.\n");
247 worklist
->safe_push (stmt_info
);
251 /* Function is_simple_and_all_uses_invariant
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
257 loop_vec_info loop_vinfo
)
262 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
266 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
268 enum vect_def_type dt
= vect_uninitialized_def
;
270 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
274 "use not simple.\n");
278 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
297 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
298 enum vect_relevant
*relevant
, bool *live_p
)
300 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
302 imm_use_iterator imm_iter
;
306 *relevant
= vect_unused_in_scope
;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info
->stmt
)
311 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
312 *relevant
= vect_used_in_scope
;
314 /* changing memory. */
315 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
316 if (gimple_vdef (stmt_info
->stmt
)
317 && !gimple_clobber_p (stmt_info
->stmt
))
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE
, vect_location
,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant
= vect_used_in_scope
;
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
328 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
330 basic_block bb
= gimple_bb (USE_STMT (use_p
));
331 if (!flow_bb_inside_loop_p (loop
, bb
))
333 if (dump_enabled_p ())
334 dump_printf_loc (MSG_NOTE
, vect_location
,
335 "vec_stmt_relevant_p: used out of loop.\n");
337 if (is_gimple_debug (USE_STMT (use_p
)))
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
343 gcc_assert (bb
== single_exit (loop
)->dest
);
350 if (*live_p
&& *relevant
== vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE
, vect_location
,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant
= vect_used_only_live
;
359 return (*live_p
|| *relevant
);
363 /* Function exist_non_indexing_operands_for_use_p
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
369 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info
))
379 /* STMT has a data_ref. FORNOW this means that its of one of
383 (This should have been verified in analyze_data_refs).
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
392 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
393 if (!assign
|| !gimple_assign_copy_p (assign
))
395 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
396 if (call
&& gimple_call_internal_p (call
))
398 internal_fn ifn
= gimple_call_internal_fn (call
);
399 int mask_index
= internal_fn_mask_index (ifn
);
401 && use
== gimple_call_arg (call
, mask_index
))
403 int stored_value_index
= internal_fn_stored_value_index (ifn
);
404 if (stored_value_index
>= 0
405 && use
== gimple_call_arg (call
, stored_value_index
))
407 if (internal_gather_scatter_fn_p (ifn
)
408 && use
== gimple_call_arg (call
, 1))
414 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
416 operand
= gimple_assign_rhs1 (assign
);
417 if (TREE_CODE (operand
) != SSA_NAME
)
428 Function process_use.
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
452 Return true if everything is as expected. Return false otherwise. */
455 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
456 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
459 stmt_vec_info dstmt_vinfo
;
460 basic_block bb
, def_bb
;
461 enum vect_def_type dt
;
463 /* case 1: we are only interested in uses that need to be vectorized. Uses
464 that are used for address computation are not considered relevant. */
465 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
466 return opt_result::success ();
468 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
469 return opt_result::failure_at (stmt_vinfo
->stmt
,
471 " unsupported use in stmt.\n");
474 return opt_result::success ();
476 def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 DSTMT_VINFO must have already been processed, because this should be the
480 only way that STMT, which is a reduction-phi, was put in the worklist,
481 as there should be no other uses for DSTMT_VINFO in the loop. So we just
482 check that everything is as expected, and we are done. */
483 bb
= gimple_bb (stmt_vinfo
->stmt
);
484 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
485 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
486 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
488 && bb
->loop_father
== def_bb
->loop_father
)
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE
, vect_location
,
492 "reduc-stmt defining reduc-phi in the same nest.\n");
493 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
494 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
495 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
496 return opt_result::success ();
499 /* case 3a: outer-loop stmt defining an inner-loop stmt:
500 outer-loop-header-bb:
506 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE
, vect_location
,
510 "outer-loop def-stmt defining inner-loop stmt.\n");
514 case vect_unused_in_scope
:
515 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
516 vect_used_in_scope
: vect_unused_in_scope
;
519 case vect_used_in_outer_by_reduction
:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
521 relevant
= vect_used_by_reduction
;
524 case vect_used_in_outer
:
525 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
526 relevant
= vect_used_in_scope
;
529 case vect_used_in_scope
:
537 /* case 3b: inner-loop stmt defining an outer-loop stmt:
538 outer-loop-header-bb:
542 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
544 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_NOTE
, vect_location
,
548 "inner-loop def-stmt defining outer-loop stmt.\n");
552 case vect_unused_in_scope
:
553 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
554 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
555 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
558 case vect_used_by_reduction
:
559 case vect_used_only_live
:
560 relevant
= vect_used_in_outer_by_reduction
;
563 case vect_used_in_scope
:
564 relevant
= vect_used_in_outer
;
571 /* We are also not interested in uses on loop PHI backedges that are
572 inductions. Otherwise we'll needlessly vectorize the IV increment
573 and cause hybrid SLP for SLP inductions. Unless the PHI is live
575 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
576 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
577 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
578 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
579 loop_latch_edge (bb
->loop_father
))
582 if (dump_enabled_p ())
583 dump_printf_loc (MSG_NOTE
, vect_location
,
584 "induction value on backedge.\n");
585 return opt_result::success ();
589 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
590 return opt_result::success ();
594 /* Function vect_mark_stmts_to_be_vectorized.
596 Not all stmts in the loop need to be vectorized. For example:
605 Stmt 1 and 3 do not need to be vectorized, because loop control and
606 addressing of vectorized data-refs are handled differently.
608 This pass detects such stmts. */
611 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
613 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
614 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
615 unsigned int nbbs
= loop
->num_nodes
;
616 gimple_stmt_iterator si
;
620 enum vect_relevant relevant
;
622 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
624 auto_vec
<stmt_vec_info
, 64> worklist
;
626 /* 1. Init worklist. */
627 for (i
= 0; i
< nbbs
; i
++)
630 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
632 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
637 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
638 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
640 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
642 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE
, vect_location
,
645 "init: stmt relevant? %G", stmt_info
->stmt
);
647 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
648 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
652 /* 2. Process_worklist */
653 while (worklist
.length () > 0)
658 stmt_vec_info stmt_vinfo
= worklist
.pop ();
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE
, vect_location
,
661 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
663 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
664 (DEF_STMT) as relevant/irrelevant according to the relevance property
666 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
668 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
669 propagated as is to the DEF_STMTs of its USEs.
671 One exception is when STMT has been identified as defining a reduction
672 variable; in this case we set the relevance to vect_used_by_reduction.
673 This is because we distinguish between two kinds of relevant stmts -
674 those that are used by a reduction computation, and those that are
675 (also) used by a regular computation. This allows us later on to
676 identify stmts that are used solely by a reduction, and therefore the
677 order of the results that they produce does not have to be kept. */
679 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
681 case vect_reduction_def
:
682 gcc_assert (relevant
!= vect_unused_in_scope
);
683 if (relevant
!= vect_unused_in_scope
684 && relevant
!= vect_used_in_scope
685 && relevant
!= vect_used_by_reduction
686 && relevant
!= vect_used_only_live
)
687 return opt_result::failure_at
688 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
691 case vect_nested_cycle
:
692 if (relevant
!= vect_unused_in_scope
693 && relevant
!= vect_used_in_outer_by_reduction
694 && relevant
!= vect_used_in_outer
)
695 return opt_result::failure_at
696 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
699 case vect_double_reduction_def
:
700 if (relevant
!= vect_unused_in_scope
701 && relevant
!= vect_used_by_reduction
702 && relevant
!= vect_used_only_live
)
703 return opt_result::failure_at
704 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
711 if (is_pattern_stmt_p (stmt_vinfo
))
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
718 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
719 tree op
= gimple_assign_rhs1 (assign
);
722 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
725 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
726 loop_vinfo
, relevant
, &worklist
, false);
729 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
730 loop_vinfo
, relevant
, &worklist
, false);
735 for (; i
< gimple_num_ops (assign
); i
++)
737 op
= gimple_op (assign
, i
);
738 if (TREE_CODE (op
) == SSA_NAME
)
741 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
748 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
750 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
752 tree arg
= gimple_call_arg (call
, i
);
754 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
762 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
764 tree op
= USE_FROM_PTR (use_p
);
766 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
772 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
774 gather_scatter_info gs_info
;
775 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
778 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
787 } /* while worklist */
789 return opt_result::success ();
792 /* Compute the prologue cost for invariant or constant operands. */
795 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
796 unsigned opno
, enum vect_def_type dt
,
797 stmt_vector_for_cost
*cost_vec
)
799 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
800 tree op
= gimple_op (stmt
, opno
);
801 unsigned prologue_cost
= 0;
803 /* Without looking at the actual initializer a vector of
804 constants can be implemented as load from the constant pool.
805 When all elements are the same we can use a splat. */
806 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
807 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
808 unsigned num_vects_to_check
;
809 unsigned HOST_WIDE_INT const_nunits
;
811 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
812 && ! multiple_p (const_nunits
, group_size
))
814 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 nelt_limit
= const_nunits
;
819 /* If either the vector has variable length or the vectors
820 are composed of repeated whole groups we only need to
821 cost construction once. All vectors will be the same. */
822 num_vects_to_check
= 1;
823 nelt_limit
= group_size
;
825 tree elt
= NULL_TREE
;
827 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
829 unsigned si
= j
% group_size
;
831 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
832 /* ??? We're just tracking whether all operands of a single
833 vector initializer are the same, ideally we'd check if
834 we emitted the same one already. */
835 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
839 if (nelt
== nelt_limit
)
841 /* ??? We need to pass down stmt_info for a vector type
842 even if it points to the wrong stmt. */
843 prologue_cost
+= record_stmt_cost
845 dt
== vect_external_def
846 ? (elt
? scalar_to_vec
: vec_construct
)
848 stmt_info
, 0, vect_prologue
);
853 return prologue_cost
;
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
863 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
864 enum vect_def_type
*dt
,
867 stmt_vector_for_cost
*cost_vec
)
869 int inside_cost
= 0, prologue_cost
= 0;
871 gcc_assert (cost_vec
!= NULL
);
873 /* ??? Somehow we need to fix this at the callers. */
875 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
879 /* Scan operands and account for prologue cost of constants/externals.
880 ??? This over-estimates cost for multiple uses and should be
882 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
883 tree lhs
= gimple_get_lhs (stmt
);
884 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
886 tree op
= gimple_op (stmt
, i
);
887 enum vect_def_type dt
;
888 if (!op
|| op
== lhs
)
890 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
891 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
892 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
897 /* Cost the "broadcast" of a scalar operand in to a vector operand.
898 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
900 for (int i
= 0; i
< ndts
; i
++)
901 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
902 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
903 stmt_info
, 0, vect_prologue
);
905 /* Adjust for two-operator SLP nodes. */
906 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
909 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
910 stmt_info
, 0, vect_body
);
913 /* Pass the inside-of-loop statements to the target-specific cost model. */
914 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
915 stmt_info
, 0, vect_body
);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE
, vect_location
,
919 "vect_model_simple_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
924 /* Model cost for type demotion and promotion operations. PWR is normally
925 zero for single-step promotions and demotions. It will be one if
926 two-step promotion/demotion is required, and so on. Each additional
927 step doubles the number of instructions required. */
930 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
931 enum vect_def_type
*dt
, int pwr
,
932 stmt_vector_for_cost
*cost_vec
)
935 int inside_cost
= 0, prologue_cost
= 0;
937 for (i
= 0; i
< pwr
+ 1; i
++)
939 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
941 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
942 vec_promote_demote
, stmt_info
, 0,
946 /* FORNOW: Assuming maximum 2 args per stmts. */
947 for (i
= 0; i
< 2; i
++)
948 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
949 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
950 stmt_info
, 0, vect_prologue
);
952 if (dump_enabled_p ())
953 dump_printf_loc (MSG_NOTE
, vect_location
,
954 "vect_model_promotion_demotion_cost: inside_cost = %d, "
955 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
958 /* Returns true if the current function returns DECL. */
961 cfun_returns (tree decl
)
965 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
967 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
970 if (gimple_return_retval (ret
) == decl
)
972 /* We often end up with an aggregate copy to the result decl,
973 handle that case as well. First skip intermediate clobbers
978 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
980 while (gimple_clobber_p (def
));
981 if (is_a
<gassign
*> (def
)
982 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
983 && gimple_assign_rhs1 (def
) == decl
)
989 /* Function vect_model_store_cost
991 Models cost for stores. In the case of grouped accesses, one access
992 has the overhead of the grouped access attributed to it. */
995 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
996 enum vect_def_type dt
,
997 vect_memory_access_type memory_access_type
,
998 vec_load_store_type vls_type
, slp_tree slp_node
,
999 stmt_vector_for_cost
*cost_vec
)
1001 unsigned int inside_cost
= 0, prologue_cost
= 0;
1002 stmt_vec_info first_stmt_info
= stmt_info
;
1003 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1005 /* ??? Somehow we need to fix this at the callers. */
1007 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1009 if (vls_type
== VLS_STORE_INVARIANT
)
1012 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
1015 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1016 stmt_info
, 0, vect_prologue
);
1019 /* Grouped stores update all elements in the group at once,
1020 so we want the DR for the first statement. */
1021 if (!slp_node
&& grouped_access_p
)
1022 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1024 /* True if we should include any once-per-group costs as well as
1025 the cost of the statement itself. For SLP we only get called
1026 once per group anyhow. */
1027 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1029 /* We assume that the cost of a single store-lanes instruction is
1030 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1031 access is instead being provided by a permute-and-store operation,
1032 include the cost of the permutes. */
1034 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1036 /* Uses a high and low interleave or shuffle operations for each
1038 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1039 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1040 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1041 stmt_info
, 0, vect_body
);
1043 if (dump_enabled_p ())
1044 dump_printf_loc (MSG_NOTE
, vect_location
,
1045 "vect_model_store_cost: strided group_size = %d .\n",
1049 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1050 /* Costs of the stores. */
1051 if (memory_access_type
== VMAT_ELEMENTWISE
1052 || memory_access_type
== VMAT_GATHER_SCATTER
)
1054 /* N scalar stores plus extracting the elements. */
1055 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1056 inside_cost
+= record_stmt_cost (cost_vec
,
1057 ncopies
* assumed_nunits
,
1058 scalar_store
, stmt_info
, 0, vect_body
);
1061 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1063 if (memory_access_type
== VMAT_ELEMENTWISE
1064 || memory_access_type
== VMAT_STRIDED_SLP
)
1066 /* N scalar stores plus extracting the elements. */
1067 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1068 inside_cost
+= record_stmt_cost (cost_vec
,
1069 ncopies
* assumed_nunits
,
1070 vec_to_scalar
, stmt_info
, 0, vect_body
);
1073 /* When vectorizing a store into the function result assign
1074 a penalty if the function returns in a multi-register location.
1075 In this case we assume we'll end up with having to spill the
1076 vector result and do piecewise loads as a conservative estimate. */
1077 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1079 && (TREE_CODE (base
) == RESULT_DECL
1080 || (DECL_P (base
) && cfun_returns (base
)))
1081 && !aggregate_value_p (base
, cfun
->decl
))
1083 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1084 /* ??? Handle PARALLEL in some way. */
1087 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1088 /* Assume that a single reg-reg move is possible and cheap,
1089 do not account for vector to gp register move cost. */
1093 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1095 stmt_info
, 0, vect_epilogue
);
1097 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1099 stmt_info
, 0, vect_epilogue
);
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE
, vect_location
,
1106 "vect_model_store_cost: inside_cost = %d, "
1107 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1111 /* Calculate cost of DR's memory access. */
1113 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1114 unsigned int *inside_cost
,
1115 stmt_vector_for_cost
*body_cost_vec
)
1117 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1118 int alignment_support_scheme
1119 = vect_supportable_dr_alignment (dr_info
, false);
1121 switch (alignment_support_scheme
)
1125 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1126 vector_store
, stmt_info
, 0,
1129 if (dump_enabled_p ())
1130 dump_printf_loc (MSG_NOTE
, vect_location
,
1131 "vect_model_store_cost: aligned.\n");
1135 case dr_unaligned_supported
:
1137 /* Here, we assign an additional cost for the unaligned store. */
1138 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1139 unaligned_store
, stmt_info
,
1140 DR_MISALIGNMENT (dr_info
),
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE
, vect_location
,
1144 "vect_model_store_cost: unaligned supported by "
1149 case dr_unaligned_unsupported
:
1151 *inside_cost
= VECT_MAX_COST
;
1153 if (dump_enabled_p ())
1154 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1155 "vect_model_store_cost: unsupported access.\n");
1165 /* Function vect_model_load_cost
1167 Models cost for loads. In the case of grouped accesses, one access has
1168 the overhead of the grouped access attributed to it. Since unaligned
1169 accesses are supported for loads, we also account for the costs of the
1170 access scheme chosen. */
1173 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1174 vect_memory_access_type memory_access_type
,
1175 slp_instance instance
,
1177 stmt_vector_for_cost
*cost_vec
)
1179 unsigned int inside_cost
= 0, prologue_cost
= 0;
1180 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1182 gcc_assert (cost_vec
);
1184 /* ??? Somehow we need to fix this at the callers. */
1186 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1188 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1190 /* If the load is permuted then the alignment is determined by
1191 the first group element not by the first scalar stmt DR. */
1192 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1193 /* Record the cost for the permutation. */
1195 unsigned assumed_nunits
1196 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1197 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1198 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1199 slp_vf
, instance
, true,
1201 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1202 first_stmt_info
, 0, vect_body
);
1203 /* And adjust the number of loads performed. This handles
1204 redundancies as well as loads that are later dead. */
1205 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1206 bitmap_clear (perm
);
1207 for (unsigned i
= 0;
1208 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1209 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1211 bool load_seen
= false;
1212 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1214 if (i
% assumed_nunits
== 0)
1220 if (bitmap_bit_p (perm
, i
))
1226 <= (DR_GROUP_SIZE (first_stmt_info
)
1227 - DR_GROUP_GAP (first_stmt_info
)
1228 + assumed_nunits
- 1) / assumed_nunits
);
1231 /* Grouped loads read all elements in the group at once,
1232 so we want the DR for the first statement. */
1233 stmt_vec_info first_stmt_info
= stmt_info
;
1234 if (!slp_node
&& grouped_access_p
)
1235 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1237 /* True if we should include any once-per-group costs as well as
1238 the cost of the statement itself. For SLP we only get called
1239 once per group anyhow. */
1240 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1242 /* We assume that the cost of a single load-lanes instruction is
1243 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1244 access is instead being provided by a load-and-permute operation,
1245 include the cost of the permutes. */
1247 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1249 /* Uses an even and odd extract operations or shuffle operations
1250 for each needed permute. */
1251 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1252 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1253 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1254 stmt_info
, 0, vect_body
);
1256 if (dump_enabled_p ())
1257 dump_printf_loc (MSG_NOTE
, vect_location
,
1258 "vect_model_load_cost: strided group_size = %d .\n",
1262 /* The loads themselves. */
1263 if (memory_access_type
== VMAT_ELEMENTWISE
1264 || memory_access_type
== VMAT_GATHER_SCATTER
)
1266 /* N scalar loads plus gathering them into a vector. */
1267 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1268 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1269 inside_cost
+= record_stmt_cost (cost_vec
,
1270 ncopies
* assumed_nunits
,
1271 scalar_load
, stmt_info
, 0, vect_body
);
1274 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1275 &inside_cost
, &prologue_cost
,
1276 cost_vec
, cost_vec
, true);
1277 if (memory_access_type
== VMAT_ELEMENTWISE
1278 || memory_access_type
== VMAT_STRIDED_SLP
)
1279 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1280 stmt_info
, 0, vect_body
);
1282 if (dump_enabled_p ())
1283 dump_printf_loc (MSG_NOTE
, vect_location
,
1284 "vect_model_load_cost: inside_cost = %d, "
1285 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1289 /* Calculate cost of DR's memory access. */
1291 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1292 bool add_realign_cost
, unsigned int *inside_cost
,
1293 unsigned int *prologue_cost
,
1294 stmt_vector_for_cost
*prologue_cost_vec
,
1295 stmt_vector_for_cost
*body_cost_vec
,
1296 bool record_prologue_costs
)
1298 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1299 int alignment_support_scheme
1300 = vect_supportable_dr_alignment (dr_info
, false);
1302 switch (alignment_support_scheme
)
1306 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1307 stmt_info
, 0, vect_body
);
1309 if (dump_enabled_p ())
1310 dump_printf_loc (MSG_NOTE
, vect_location
,
1311 "vect_model_load_cost: aligned.\n");
1315 case dr_unaligned_supported
:
1317 /* Here, we assign an additional cost for the unaligned load. */
1318 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1319 unaligned_load
, stmt_info
,
1320 DR_MISALIGNMENT (dr_info
),
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_NOTE
, vect_location
,
1325 "vect_model_load_cost: unaligned supported by "
1330 case dr_explicit_realign
:
1332 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1333 vector_load
, stmt_info
, 0, vect_body
);
1334 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1335 vec_perm
, stmt_info
, 0, vect_body
);
1337 /* FIXME: If the misalignment remains fixed across the iterations of
1338 the containing loop, the following cost should be added to the
1340 if (targetm
.vectorize
.builtin_mask_for_load
)
1341 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1342 stmt_info
, 0, vect_body
);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE
, vect_location
,
1346 "vect_model_load_cost: explicit realign\n");
1350 case dr_explicit_realign_optimized
:
1352 if (dump_enabled_p ())
1353 dump_printf_loc (MSG_NOTE
, vect_location
,
1354 "vect_model_load_cost: unaligned software "
1357 /* Unaligned software pipeline has a load of an address, an initial
1358 load, and possibly a mask operation to "prime" the loop. However,
1359 if this is an access in a group of loads, which provide grouped
1360 access, then the above cost should only be considered for one
1361 access in the group. Inside the loop, there is a load op
1362 and a realignment op. */
1364 if (add_realign_cost
&& record_prologue_costs
)
1366 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1367 vector_stmt
, stmt_info
,
1369 if (targetm
.vectorize
.builtin_mask_for_load
)
1370 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1371 vector_stmt
, stmt_info
,
1375 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1376 stmt_info
, 0, vect_body
);
1377 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1378 stmt_info
, 0, vect_body
);
1380 if (dump_enabled_p ())
1381 dump_printf_loc (MSG_NOTE
, vect_location
,
1382 "vect_model_load_cost: explicit realign optimized"
1388 case dr_unaligned_unsupported
:
1390 *inside_cost
= VECT_MAX_COST
;
1392 if (dump_enabled_p ())
1393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1394 "vect_model_load_cost: unsupported access.\n");
1403 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1404 the loop preheader for the vectorized stmt STMT_VINFO. */
1407 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1408 gimple_stmt_iterator
*gsi
)
1411 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1414 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1418 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1422 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1425 pe
= loop_preheader_edge (loop
);
1426 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1427 gcc_assert (!new_bb
);
1431 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1433 gimple_stmt_iterator gsi_bb_start
;
1435 gcc_assert (bb_vinfo
);
1436 bb
= BB_VINFO_BB (bb_vinfo
);
1437 gsi_bb_start
= gsi_after_labels (bb
);
1438 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1442 if (dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE
, vect_location
,
1444 "created new init_stmt: %G", new_stmt
);
1447 /* Function vect_init_vector.
1449 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1450 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1451 vector type a vector with all elements equal to VAL is created first.
1452 Place the initialization at BSI if it is not NULL. Otherwise, place the
1453 initialization at the loop preheader.
1454 Return the DEF of INIT_STMT.
1455 It will be used in the vectorization of STMT_INFO. */
1458 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1459 gimple_stmt_iterator
*gsi
)
1464 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1465 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1467 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1468 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1470 /* Scalar boolean value should be transformed into
1471 all zeros or all ones value before building a vector. */
1472 if (VECTOR_BOOLEAN_TYPE_P (type
))
1474 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1475 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1477 if (CONSTANT_CLASS_P (val
))
1478 val
= integer_zerop (val
) ? false_val
: true_val
;
1481 new_temp
= make_ssa_name (TREE_TYPE (type
));
1482 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1483 val
, true_val
, false_val
);
1484 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1490 gimple_seq stmts
= NULL
;
1491 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1492 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1493 TREE_TYPE (type
), val
);
1495 /* ??? Condition vectorization expects us to do
1496 promotion of invariant/external defs. */
1497 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1498 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1499 !gsi_end_p (gsi2
); )
1501 init_stmt
= gsi_stmt (gsi2
);
1502 gsi_remove (&gsi2
, false);
1503 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1507 val
= build_vector_from_val (type
, val
);
1510 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1511 init_stmt
= gimple_build_assign (new_temp
, val
);
1512 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1516 /* Function vect_get_vec_def_for_operand_1.
1518 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1519 with type DT that will be used in the vectorized stmt. */
1522 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1523 enum vect_def_type dt
)
1526 stmt_vec_info vec_stmt_info
;
1530 /* operand is a constant or a loop invariant. */
1531 case vect_constant_def
:
1532 case vect_external_def
:
1533 /* Code should use vect_get_vec_def_for_operand. */
1536 /* Operand is defined by a loop header phi. In case of nested
1537 cycles we also may have uses of the backedge def. */
1538 case vect_reduction_def
:
1539 case vect_double_reduction_def
:
1540 case vect_nested_cycle
:
1541 case vect_induction_def
:
1542 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1543 || dt
== vect_nested_cycle
);
1546 /* operand is defined inside the loop. */
1547 case vect_internal_def
:
1549 /* Get the def from the vectorized stmt. */
1550 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1551 /* Get vectorized pattern statement. */
1553 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1554 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1555 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1556 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1557 gcc_assert (vec_stmt_info
);
1558 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1559 vec_oprnd
= PHI_RESULT (phi
);
1561 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1571 /* Function vect_get_vec_def_for_operand.
1573 OP is an operand in STMT_VINFO. This function returns a (vector) def
1574 that will be used in the vectorized stmt for STMT_VINFO.
1576 In the case that OP is an SSA_NAME which is defined in the loop, then
1577 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1579 In case OP is an invariant or constant, a new stmt that creates a vector def
1580 needs to be introduced. VECTYPE may be used to specify a required type for
1581 vector invariant. */
1584 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1587 enum vect_def_type dt
;
1589 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1591 if (dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE
, vect_location
,
1593 "vect_get_vec_def_for_operand: %T\n", op
);
1595 stmt_vec_info def_stmt_info
;
1596 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1597 &def_stmt_info
, &def_stmt
);
1598 gcc_assert (is_simple_use
);
1599 if (def_stmt
&& dump_enabled_p ())
1600 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1602 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1604 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1608 vector_type
= vectype
;
1609 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1610 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1611 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1613 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1615 gcc_assert (vector_type
);
1616 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1619 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1623 /* Function vect_get_vec_def_for_stmt_copy
1625 Return a vector-def for an operand. This function is used when the
1626 vectorized stmt to be created (by the caller to this function) is a "copy"
1627 created in case the vectorized result cannot fit in one vector, and several
1628 copies of the vector-stmt are required. In this case the vector-def is
1629 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1630 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1633 In case the vectorization factor (VF) is bigger than the number
1634 of elements that can fit in a vectype (nunits), we have to generate
1635 more than one vector stmt to vectorize the scalar stmt. This situation
1636 arises when there are multiple data-types operated upon in the loop; the
1637 smallest data-type determines the VF, and as a result, when vectorizing
1638 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1639 vector stmt (each computing a vector of 'nunits' results, and together
1640 computing 'VF' results in each iteration). This function is called when
1641 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1642 which VF=16 and nunits=4, so the number of copies required is 4):
1644 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1646 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1647 VS1.1: vx.1 = memref1 VS1.2
1648 VS1.2: vx.2 = memref2 VS1.3
1649 VS1.3: vx.3 = memref3
1651 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1652 VSnew.1: vz1 = vx.1 + ... VSnew.2
1653 VSnew.2: vz2 = vx.2 + ... VSnew.3
1654 VSnew.3: vz3 = vx.3 + ...
1656 The vectorization of S1 is explained in vectorizable_load.
1657 The vectorization of S2:
1658 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1659 the function 'vect_get_vec_def_for_operand' is called to
1660 get the relevant vector-def for each operand of S2. For operand x it
1661 returns the vector-def 'vx.0'.
1663 To create the remaining copies of the vector-stmt (VSnew.j), this
1664 function is called to get the relevant vector-def for each operand. It is
1665 obtained from the respective VS1.j stmt, which is recorded in the
1666 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1668 For example, to obtain the vector-def 'vx.1' in order to create the
1669 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1670 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1671 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1672 and return its def ('vx.1').
1673 Overall, to create the above sequence this function will be called 3 times:
1674 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1675 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1676 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1679 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1681 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1683 /* Do nothing; can reuse same def. */
1686 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1687 gcc_assert (def_stmt_info
);
1688 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1689 vec_oprnd
= PHI_RESULT (phi
);
1691 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1696 /* Get vectorized definitions for the operands to create a copy of an original
1697 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1700 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1701 vec
<tree
> *vec_oprnds0
,
1702 vec
<tree
> *vec_oprnds1
)
1704 tree vec_oprnd
= vec_oprnds0
->pop ();
1706 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1707 vec_oprnds0
->quick_push (vec_oprnd
);
1709 if (vec_oprnds1
&& vec_oprnds1
->length ())
1711 vec_oprnd
= vec_oprnds1
->pop ();
1712 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1713 vec_oprnds1
->quick_push (vec_oprnd
);
1718 /* Get vectorized definitions for OP0 and OP1. */
1721 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1722 vec
<tree
> *vec_oprnds0
,
1723 vec
<tree
> *vec_oprnds1
,
1728 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1729 auto_vec
<tree
> ops (nops
);
1730 auto_vec
<vec
<tree
> > vec_defs (nops
);
1732 ops
.quick_push (op0
);
1734 ops
.quick_push (op1
);
1736 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1738 *vec_oprnds0
= vec_defs
[0];
1740 *vec_oprnds1
= vec_defs
[1];
1746 vec_oprnds0
->create (1);
1747 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1748 vec_oprnds0
->quick_push (vec_oprnd
);
1752 vec_oprnds1
->create (1);
1753 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1754 vec_oprnds1
->quick_push (vec_oprnd
);
1759 /* Helper function called by vect_finish_replace_stmt and
1760 vect_finish_stmt_generation. Set the location of the new
1761 statement and create and return a stmt_vec_info for it. */
1763 static stmt_vec_info
1764 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1766 vec_info
*vinfo
= stmt_info
->vinfo
;
1768 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1773 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1775 /* While EH edges will generally prevent vectorization, stmt might
1776 e.g. be in a must-not-throw region. Ensure newly created stmts
1777 that could throw are part of the same region. */
1778 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1779 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1780 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1782 return vec_stmt_info
;
1785 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1786 which sets the same scalar result as STMT_INFO did. Create and return a
1787 stmt_vec_info for VEC_STMT. */
1790 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1792 gcc_assert (gimple_get_lhs (stmt_info
->stmt
) == gimple_get_lhs (vec_stmt
));
1794 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt_info
->stmt
);
1795 gsi_replace (&gsi
, vec_stmt
, true);
1797 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1800 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1801 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1804 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1805 gimple_stmt_iterator
*gsi
)
1807 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1809 if (!gsi_end_p (*gsi
)
1810 && gimple_has_mem_ops (vec_stmt
))
1812 gimple
*at_stmt
= gsi_stmt (*gsi
);
1813 tree vuse
= gimple_vuse (at_stmt
);
1814 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1816 tree vdef
= gimple_vdef (at_stmt
);
1817 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1818 /* If we have an SSA vuse and insert a store, update virtual
1819 SSA form to avoid triggering the renamer. Do so only
1820 if we can easily see all uses - which is what almost always
1821 happens with the way vectorized stmts are inserted. */
1822 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1823 && ((is_gimple_assign (vec_stmt
)
1824 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1825 || (is_gimple_call (vec_stmt
)
1826 && !(gimple_call_flags (vec_stmt
)
1827 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1829 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1830 gimple_set_vdef (vec_stmt
, new_vdef
);
1831 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1835 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1836 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1839 /* We want to vectorize a call to combined function CFN with function
1840 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1841 as the types of all inputs. Check whether this is possible using
1842 an internal function, returning its code if so or IFN_LAST if not. */
1845 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1846 tree vectype_out
, tree vectype_in
)
1849 if (internal_fn_p (cfn
))
1850 ifn
= as_internal_fn (cfn
);
1852 ifn
= associated_internal_fn (fndecl
);
1853 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1855 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1856 if (info
.vectorizable
)
1858 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1859 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1860 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1861 OPTIMIZE_FOR_SPEED
))
1869 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1870 gimple_stmt_iterator
*);
1872 /* Check whether a load or store statement in the loop described by
1873 LOOP_VINFO is possible in a fully-masked loop. This is testing
1874 whether the vectorizer pass has the appropriate support, as well as
1875 whether the target does.
1877 VLS_TYPE says whether the statement is a load or store and VECTYPE
1878 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1879 says how the load or store is going to be implemented and GROUP_SIZE
1880 is the number of load or store statements in the containing group.
1881 If the access is a gather load or scatter store, GS_INFO describes
1884 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1885 supported, otherwise record the required mask types. */
1888 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1889 vec_load_store_type vls_type
, int group_size
,
1890 vect_memory_access_type memory_access_type
,
1891 gather_scatter_info
*gs_info
)
1893 /* Invariant loads need no special support. */
1894 if (memory_access_type
== VMAT_INVARIANT
)
1897 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1898 machine_mode vecmode
= TYPE_MODE (vectype
);
1899 bool is_load
= (vls_type
== VLS_LOAD
);
1900 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1903 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1904 : !vect_store_lanes_supported (vectype
, group_size
, true))
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1908 "can't use a fully-masked loop because the"
1909 " target doesn't have an appropriate masked"
1910 " load/store-lanes instruction.\n");
1911 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1914 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1915 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1919 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1921 internal_fn ifn
= (is_load
1922 ? IFN_MASK_GATHER_LOAD
1923 : IFN_MASK_SCATTER_STORE
);
1924 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1925 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1926 gs_info
->memory_type
,
1927 TYPE_SIGN (offset_type
),
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1932 "can't use a fully-masked loop because the"
1933 " target doesn't have an appropriate masked"
1934 " gather load or scatter store instruction.\n");
1935 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1938 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1939 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1943 if (memory_access_type
!= VMAT_CONTIGUOUS
1944 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1946 /* Element X of the data must come from iteration i * VF + X of the
1947 scalar loop. We need more work to support other mappings. */
1948 if (dump_enabled_p ())
1949 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1950 "can't use a fully-masked loop because an access"
1951 " isn't contiguous.\n");
1952 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1956 machine_mode mask_mode
;
1957 if (!(targetm
.vectorize
.get_mask_mode
1958 (GET_MODE_NUNITS (vecmode
),
1959 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1960 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1962 if (dump_enabled_p ())
1963 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1964 "can't use a fully-masked loop because the target"
1965 " doesn't have the appropriate masked load or"
1967 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1970 /* We might load more scalars than we need for permuting SLP loads.
1971 We checked in get_group_load_store_type that the extra elements
1972 don't leak into a new vector. */
1973 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1974 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1975 unsigned int nvectors
;
1976 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1977 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1982 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1983 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1984 that needs to be applied to all loads and stores in a vectorized loop.
1985 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1987 MASK_TYPE is the type of both masks. If new statements are needed,
1988 insert them before GSI. */
1991 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1992 gimple_stmt_iterator
*gsi
)
1994 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1998 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1999 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
2000 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
2001 vec_mask
, loop_mask
);
2002 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
2006 /* Determine whether we can use a gather load or scatter store to vectorize
2007 strided load or store STMT_INFO by truncating the current offset to a
2008 smaller width. We need to be able to construct an offset vector:
2010 { 0, X, X*2, X*3, ... }
2012 without loss of precision, where X is STMT_INFO's DR_STEP.
2014 Return true if this is possible, describing the gather load or scatter
2015 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2018 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
2019 loop_vec_info loop_vinfo
, bool masked_p
,
2020 gather_scatter_info
*gs_info
)
2022 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2023 data_reference
*dr
= dr_info
->dr
;
2024 tree step
= DR_STEP (dr
);
2025 if (TREE_CODE (step
) != INTEGER_CST
)
2027 /* ??? Perhaps we could use range information here? */
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE
, vect_location
,
2030 "cannot truncate variable step.\n");
2034 /* Get the number of bits in an element. */
2035 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2036 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2037 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2039 /* Set COUNT to the upper limit on the number of elements - 1.
2040 Start with the maximum vectorization factor. */
2041 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2043 /* Try lowering COUNT to the number of scalar latch iterations. */
2044 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2045 widest_int max_iters
;
2046 if (max_loop_iterations (loop
, &max_iters
)
2047 && max_iters
< count
)
2048 count
= max_iters
.to_shwi ();
2050 /* Try scales of 1 and the element size. */
2051 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
2052 wi::overflow_type overflow
= wi::OVF_NONE
;
2053 for (int i
= 0; i
< 2; ++i
)
2055 int scale
= scales
[i
];
2057 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2060 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2061 in OFFSET_BITS bits. */
2062 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2065 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2066 if (wi::min_precision (range
, sign
) > element_bits
)
2068 overflow
= wi::OVF_UNKNOWN
;
2072 /* See whether the target supports the operation. */
2073 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2074 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2075 memory_type
, element_bits
, sign
, scale
,
2076 &gs_info
->ifn
, &gs_info
->element_type
))
2079 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2082 gs_info
->decl
= NULL_TREE
;
2083 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2084 but we don't need to store that here. */
2085 gs_info
->base
= NULL_TREE
;
2086 gs_info
->offset
= fold_convert (offset_type
, step
);
2087 gs_info
->offset_dt
= vect_constant_def
;
2088 gs_info
->offset_vectype
= NULL_TREE
;
2089 gs_info
->scale
= scale
;
2090 gs_info
->memory_type
= memory_type
;
2094 if (overflow
&& dump_enabled_p ())
2095 dump_printf_loc (MSG_NOTE
, vect_location
,
2096 "truncating gather/scatter offset to %d bits"
2097 " might change its value.\n", element_bits
);
2102 /* Return true if we can use gather/scatter internal functions to
2103 vectorize STMT_INFO, which is a grouped or strided load or store.
2104 MASKED_P is true if load or store is conditional. When returning
2105 true, fill in GS_INFO with the information required to perform the
2109 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2110 loop_vec_info loop_vinfo
, bool masked_p
,
2111 gather_scatter_info
*gs_info
)
2113 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2115 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2118 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2119 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2120 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2121 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2123 /* Enforced by vect_check_gather_scatter. */
2124 gcc_assert (element_bits
>= offset_bits
);
2126 /* If the elements are wider than the offset, convert the offset to the
2127 same width, without changing its sign. */
2128 if (element_bits
> offset_bits
)
2130 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2131 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2132 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2135 if (dump_enabled_p ())
2136 dump_printf_loc (MSG_NOTE
, vect_location
,
2137 "using gather/scatter for strided/grouped access,"
2138 " scale = %d\n", gs_info
->scale
);
2143 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2144 elements with a known constant step. Return -1 if that step
2145 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2148 compare_step_with_zero (stmt_vec_info stmt_info
)
2150 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2151 return tree_int_cst_compare (vect_dr_behavior (dr_info
)->step
,
2155 /* If the target supports a permute mask that reverses the elements in
2156 a vector of type VECTYPE, return that mask, otherwise return null. */
2159 perm_mask_for_reverse (tree vectype
)
2161 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2163 /* The encoding has a single stepped pattern. */
2164 vec_perm_builder
sel (nunits
, 1, 3);
2165 for (int i
= 0; i
< 3; ++i
)
2166 sel
.quick_push (nunits
- 1 - i
);
2168 vec_perm_indices
indices (sel
, 1, nunits
);
2169 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2171 return vect_gen_perm_mask_checked (vectype
, indices
);
2174 /* STMT_INFO is either a masked or unconditional store. Return the value
2178 vect_get_store_rhs (stmt_vec_info stmt_info
)
2180 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2182 gcc_assert (gimple_assign_single_p (assign
));
2183 return gimple_assign_rhs1 (assign
);
2185 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2187 internal_fn ifn
= gimple_call_internal_fn (call
);
2188 int index
= internal_fn_stored_value_index (ifn
);
2189 gcc_assert (index
>= 0);
2190 return gimple_call_arg (call
, index
);
2195 /* A subroutine of get_load_store_type, with a subset of the same
2196 arguments. Handle the case where STMT_INFO is part of a grouped load
2199 For stores, the statements in the group are all consecutive
2200 and there is no gap at the end. For loads, the statements in the
2201 group might not be consecutive; there can be gaps between statements
2202 as well as at the end. */
2205 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2206 bool masked_p
, vec_load_store_type vls_type
,
2207 vect_memory_access_type
*memory_access_type
,
2208 gather_scatter_info
*gs_info
)
2210 vec_info
*vinfo
= stmt_info
->vinfo
;
2211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2212 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2213 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2214 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2215 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2216 bool single_element_p
= (stmt_info
== first_stmt_info
2217 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2218 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2219 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2221 /* True if the vectorized statements would access beyond the last
2222 statement in the group. */
2223 bool overrun_p
= false;
2225 /* True if we can cope with such overrun by peeling for gaps, so that
2226 there is at least one final scalar iteration after the vector loop. */
2227 bool can_overrun_p
= (!masked_p
2228 && vls_type
== VLS_LOAD
2232 /* There can only be a gap at the end of the group if the stride is
2233 known at compile time. */
2234 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2236 /* Stores can't yet have gaps. */
2237 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2241 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2243 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2244 separated by the stride, until we have a complete vector.
2245 Fall back to scalar accesses if that isn't possible. */
2246 if (multiple_p (nunits
, group_size
))
2247 *memory_access_type
= VMAT_STRIDED_SLP
;
2249 *memory_access_type
= VMAT_ELEMENTWISE
;
2253 overrun_p
= loop_vinfo
&& gap
!= 0;
2254 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2257 "Grouped store with gaps requires"
2258 " non-consecutive accesses\n");
2261 /* An overrun is fine if the trailing elements are smaller
2262 than the alignment boundary B. Every vector access will
2263 be a multiple of B and so we are guaranteed to access a
2264 non-gap element in the same B-sized block. */
2266 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2267 / vect_get_scalar_dr_size (first_dr_info
)))
2270 /* If the gap splits the vector in half and the target
2271 can do half-vector operations avoid the epilogue peeling
2272 by simply loading half of the vector only. Usually
2273 the construction with an upper zero half will be elided. */
2274 dr_alignment_support alignment_support_scheme
;
2275 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2279 && (((alignment_support_scheme
2280 = vect_supportable_dr_alignment (first_dr_info
, false)))
2282 || alignment_support_scheme
== dr_unaligned_supported
)
2283 && known_eq (nunits
, (group_size
- gap
) * 2)
2284 && known_eq (nunits
, group_size
)
2285 && mode_for_vector (elmode
, (group_size
- gap
)).exists (&vmode
)
2286 && VECTOR_MODE_P (vmode
)
2287 && targetm
.vector_mode_supported_p (vmode
)
2288 && (convert_optab_handler (vec_init_optab
,
2289 TYPE_MODE (vectype
), vmode
)
2290 != CODE_FOR_nothing
))
2293 if (overrun_p
&& !can_overrun_p
)
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2297 "Peeling for outer loop is not supported\n");
2300 *memory_access_type
= VMAT_CONTIGUOUS
;
2305 /* We can always handle this case using elementwise accesses,
2306 but see if something more efficient is available. */
2307 *memory_access_type
= VMAT_ELEMENTWISE
;
2309 /* If there is a gap at the end of the group then these optimizations
2310 would access excess elements in the last iteration. */
2311 bool would_overrun_p
= (gap
!= 0);
2312 /* An overrun is fine if the trailing elements are smaller than the
2313 alignment boundary B. Every vector access will be a multiple of B
2314 and so we are guaranteed to access a non-gap element in the
2315 same B-sized block. */
2318 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2319 / vect_get_scalar_dr_size (first_dr_info
)))
2320 would_overrun_p
= false;
2322 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2323 && (can_overrun_p
|| !would_overrun_p
)
2324 && compare_step_with_zero (stmt_info
) > 0)
2326 /* First cope with the degenerate case of a single-element
2328 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2329 *memory_access_type
= VMAT_CONTIGUOUS
;
2331 /* Otherwise try using LOAD/STORE_LANES. */
2332 if (*memory_access_type
== VMAT_ELEMENTWISE
2333 && (vls_type
== VLS_LOAD
2334 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2335 : vect_store_lanes_supported (vectype
, group_size
,
2338 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2339 overrun_p
= would_overrun_p
;
2342 /* If that fails, try using permuting loads. */
2343 if (*memory_access_type
== VMAT_ELEMENTWISE
2344 && (vls_type
== VLS_LOAD
2345 ? vect_grouped_load_supported (vectype
, single_element_p
,
2347 : vect_grouped_store_supported (vectype
, group_size
)))
2349 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2350 overrun_p
= would_overrun_p
;
2354 /* As a last resort, trying using a gather load or scatter store.
2356 ??? Although the code can handle all group sizes correctly,
2357 it probably isn't a win to use separate strided accesses based
2358 on nearby locations. Or, even if it's a win over scalar code,
2359 it might not be a win over vectorizing at a lower VF, if that
2360 allows us to use contiguous accesses. */
2361 if (*memory_access_type
== VMAT_ELEMENTWISE
2364 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2366 *memory_access_type
= VMAT_GATHER_SCATTER
;
2369 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2371 /* STMT is the leader of the group. Check the operands of all the
2372 stmts of the group. */
2373 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2374 while (next_stmt_info
)
2376 tree op
= vect_get_store_rhs (next_stmt_info
);
2377 enum vect_def_type dt
;
2378 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2382 "use not simple.\n");
2385 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2391 gcc_assert (can_overrun_p
);
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2394 "Data access with gaps requires scalar "
2396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2402 /* A subroutine of get_load_store_type, with a subset of the same
2403 arguments. Handle the case where STMT_INFO is a load or store that
2404 accesses consecutive elements with a negative step. */
2406 static vect_memory_access_type
2407 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2408 vec_load_store_type vls_type
,
2409 unsigned int ncopies
)
2411 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2412 dr_alignment_support alignment_support_scheme
;
2416 if (dump_enabled_p ())
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2418 "multiple types with negative step.\n");
2419 return VMAT_ELEMENTWISE
;
2422 alignment_support_scheme
= vect_supportable_dr_alignment (dr_info
, false);
2423 if (alignment_support_scheme
!= dr_aligned
2424 && alignment_support_scheme
!= dr_unaligned_supported
)
2426 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2428 "negative step but alignment required.\n");
2429 return VMAT_ELEMENTWISE
;
2432 if (vls_type
== VLS_STORE_INVARIANT
)
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_NOTE
, vect_location
,
2436 "negative step with invariant source;"
2437 " no permute needed.\n");
2438 return VMAT_CONTIGUOUS_DOWN
;
2441 if (!perm_mask_for_reverse (vectype
))
2443 if (dump_enabled_p ())
2444 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2445 "negative step and reversing not supported.\n");
2446 return VMAT_ELEMENTWISE
;
2449 return VMAT_CONTIGUOUS_REVERSE
;
2452 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2453 if there is a memory access type that the vectorized form can use,
2454 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2455 or scatters, fill in GS_INFO accordingly.
2457 SLP says whether we're performing SLP rather than loop vectorization.
2458 MASKED_P is true if the statement is conditional on a vectorized mask.
2459 VECTYPE is the vector type that the vectorized statements will use.
2460 NCOPIES is the number of vector statements that will be needed. */
2463 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2464 bool masked_p
, vec_load_store_type vls_type
,
2465 unsigned int ncopies
,
2466 vect_memory_access_type
*memory_access_type
,
2467 gather_scatter_info
*gs_info
)
2469 vec_info
*vinfo
= stmt_info
->vinfo
;
2470 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2471 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2474 *memory_access_type
= VMAT_GATHER_SCATTER
;
2475 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2477 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2478 &gs_info
->offset_dt
,
2479 &gs_info
->offset_vectype
))
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2483 "%s index use not simple.\n",
2484 vls_type
== VLS_LOAD
? "gather" : "scatter");
2488 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2490 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2491 vls_type
, memory_access_type
, gs_info
))
2494 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2498 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2500 *memory_access_type
= VMAT_GATHER_SCATTER
;
2502 *memory_access_type
= VMAT_ELEMENTWISE
;
2506 int cmp
= compare_step_with_zero (stmt_info
);
2508 *memory_access_type
= get_negative_load_store_type
2509 (stmt_info
, vectype
, vls_type
, ncopies
);
2512 gcc_assert (vls_type
== VLS_LOAD
);
2513 *memory_access_type
= VMAT_INVARIANT
;
2516 *memory_access_type
= VMAT_CONTIGUOUS
;
2519 if ((*memory_access_type
== VMAT_ELEMENTWISE
2520 || *memory_access_type
== VMAT_STRIDED_SLP
)
2521 && !nunits
.is_constant ())
2523 if (dump_enabled_p ())
2524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2525 "Not using elementwise accesses due to variable "
2526 "vectorization factor.\n");
2530 /* FIXME: At the moment the cost model seems to underestimate the
2531 cost of using elementwise accesses. This check preserves the
2532 traditional behavior until that can be fixed. */
2533 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2534 if (!first_stmt_info
)
2535 first_stmt_info
= stmt_info
;
2536 if (*memory_access_type
== VMAT_ELEMENTWISE
2537 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2538 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2539 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2540 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2542 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2544 "not falling back to elementwise accesses\n");
2550 /* Return true if boolean argument MASK is suitable for vectorizing
2551 conditional load or store STMT_INFO. When returning true, store the type
2552 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2553 in *MASK_VECTYPE_OUT. */
2556 vect_check_load_store_mask (stmt_vec_info stmt_info
, tree mask
,
2557 vect_def_type
*mask_dt_out
,
2558 tree
*mask_vectype_out
)
2560 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2564 "mask argument is not a boolean.\n");
2568 if (TREE_CODE (mask
) != SSA_NAME
)
2570 if (dump_enabled_p ())
2571 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2572 "mask argument is not an SSA name.\n");
2576 enum vect_def_type mask_dt
;
2578 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2582 "mask use not simple.\n");
2586 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2588 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2590 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2592 if (dump_enabled_p ())
2593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2594 "could not find an appropriate vector mask type.\n");
2598 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2599 TYPE_VECTOR_SUBPARTS (vectype
)))
2601 if (dump_enabled_p ())
2602 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2603 "vector mask type %T"
2604 " does not match vector data type %T.\n",
2605 mask_vectype
, vectype
);
2610 *mask_dt_out
= mask_dt
;
2611 *mask_vectype_out
= mask_vectype
;
2615 /* Return true if stored value RHS is suitable for vectorizing store
2616 statement STMT_INFO. When returning true, store the type of the
2617 definition in *RHS_DT_OUT, the type of the vectorized store value in
2618 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2621 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2622 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2623 vec_load_store_type
*vls_type_out
)
2625 /* In the case this is a store from a constant make sure
2626 native_encode_expr can handle it. */
2627 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2629 if (dump_enabled_p ())
2630 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2631 "cannot encode constant as a byte sequence.\n");
2635 enum vect_def_type rhs_dt
;
2637 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2641 "use not simple.\n");
2645 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2646 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2650 "incompatible vector types.\n");
2654 *rhs_dt_out
= rhs_dt
;
2655 *rhs_vectype_out
= rhs_vectype
;
2656 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2657 *vls_type_out
= VLS_STORE_INVARIANT
;
2659 *vls_type_out
= VLS_STORE
;
2663 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2664 Note that we support masks with floating-point type, in which case the
2665 floats are interpreted as a bitmask. */
2668 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2670 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2671 return build_int_cst (masktype
, -1);
2672 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2674 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2675 mask
= build_vector_from_val (masktype
, mask
);
2676 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2678 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2682 for (int j
= 0; j
< 6; ++j
)
2684 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2685 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2686 mask
= build_vector_from_val (masktype
, mask
);
2687 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2692 /* Build an all-zero merge value of type VECTYPE while vectorizing
2693 STMT_INFO as a gather load. */
2696 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2699 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2700 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2701 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2705 for (int j
= 0; j
< 6; ++j
)
2707 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2708 merge
= build_real (TREE_TYPE (vectype
), r
);
2712 merge
= build_vector_from_val (vectype
, merge
);
2713 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2716 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2717 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2718 the gather load operation. If the load is conditional, MASK is the
2719 unvectorized condition and MASK_DT is its definition type, otherwise
2723 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2724 gimple_stmt_iterator
*gsi
,
2725 stmt_vec_info
*vec_stmt
,
2726 gather_scatter_info
*gs_info
,
2729 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2730 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2731 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2732 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2733 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2734 edge pe
= loop_preheader_edge (loop
);
2735 enum { NARROW
, NONE
, WIDEN
} modifier
;
2736 poly_uint64 gather_off_nunits
2737 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2739 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2740 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2741 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2742 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2743 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2744 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2745 tree scaletype
= TREE_VALUE (arglist
);
2746 tree real_masktype
= masktype
;
2747 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2749 || TREE_CODE (masktype
) == INTEGER_TYPE
2750 || types_compatible_p (srctype
, masktype
)));
2751 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2752 masktype
= build_same_sized_truth_vector_type (srctype
);
2754 tree mask_halftype
= masktype
;
2755 tree perm_mask
= NULL_TREE
;
2756 tree mask_perm_mask
= NULL_TREE
;
2757 if (known_eq (nunits
, gather_off_nunits
))
2759 else if (known_eq (nunits
* 2, gather_off_nunits
))
2763 /* Currently widening gathers and scatters are only supported for
2764 fixed-length vectors. */
2765 int count
= gather_off_nunits
.to_constant ();
2766 vec_perm_builder
sel (count
, count
, 1);
2767 for (int i
= 0; i
< count
; ++i
)
2768 sel
.quick_push (i
| (count
/ 2));
2770 vec_perm_indices
indices (sel
, 1, count
);
2771 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2774 else if (known_eq (nunits
, gather_off_nunits
* 2))
2778 /* Currently narrowing gathers and scatters are only supported for
2779 fixed-length vectors. */
2780 int count
= nunits
.to_constant ();
2781 vec_perm_builder
sel (count
, count
, 1);
2782 sel
.quick_grow (count
);
2783 for (int i
= 0; i
< count
; ++i
)
2784 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2785 vec_perm_indices
indices (sel
, 2, count
);
2786 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2790 if (mask
&& masktype
== real_masktype
)
2792 for (int i
= 0; i
< count
; ++i
)
2793 sel
[i
] = i
| (count
/ 2);
2794 indices
.new_vector (sel
, 2, count
);
2795 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2799 = build_same_sized_truth_vector_type (gs_info
->offset_vectype
);
2804 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2805 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2807 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2808 if (!is_gimple_min_invariant (ptr
))
2811 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2812 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2813 gcc_assert (!new_bb
);
2816 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2818 tree vec_oprnd0
= NULL_TREE
;
2819 tree vec_mask
= NULL_TREE
;
2820 tree src_op
= NULL_TREE
;
2821 tree mask_op
= NULL_TREE
;
2822 tree prev_res
= NULL_TREE
;
2823 stmt_vec_info prev_stmt_info
= NULL
;
2827 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2828 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2831 for (int j
= 0; j
< ncopies
; ++j
)
2834 if (modifier
== WIDEN
&& (j
& 1))
2835 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2836 perm_mask
, stmt_info
, gsi
);
2839 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2841 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2844 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2846 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2847 TYPE_VECTOR_SUBPARTS (idxtype
)));
2848 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2849 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2850 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2851 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2857 if (mask_perm_mask
&& (j
& 1))
2858 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2859 mask_perm_mask
, stmt_info
, gsi
);
2863 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2864 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2865 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2869 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2871 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2872 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2873 gcc_assert (known_eq (sub1
, sub2
));
2874 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2875 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2877 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2878 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2882 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2884 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2886 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2887 : VEC_UNPACK_LO_EXPR
,
2889 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2895 tree mask_arg
= mask_op
;
2896 if (masktype
!= real_masktype
)
2898 tree utype
, optype
= TREE_TYPE (mask_op
);
2899 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2900 utype
= real_masktype
;
2902 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2903 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2904 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2906 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2907 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2909 if (!useless_type_conversion_p (real_masktype
, utype
))
2911 gcc_assert (TYPE_PRECISION (utype
)
2912 <= TYPE_PRECISION (real_masktype
));
2913 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2914 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2915 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2918 src_op
= build_zero_cst (srctype
);
2920 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2923 stmt_vec_info new_stmt_info
;
2924 if (!useless_type_conversion_p (vectype
, rettype
))
2926 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2927 TYPE_VECTOR_SUBPARTS (rettype
)));
2928 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2929 gimple_call_set_lhs (new_call
, op
);
2930 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2931 var
= make_ssa_name (vec_dest
);
2932 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2933 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2935 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2939 var
= make_ssa_name (vec_dest
, new_call
);
2940 gimple_call_set_lhs (new_call
, var
);
2942 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2945 if (modifier
== NARROW
)
2952 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2954 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2957 if (prev_stmt_info
== NULL
)
2958 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2960 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2961 prev_stmt_info
= new_stmt_info
;
2965 /* Prepare the base and offset in GS_INFO for vectorization.
2966 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2967 to the vectorized offset argument for the first copy of STMT_INFO.
2968 STMT_INFO is the statement described by GS_INFO and LOOP is the
2972 vect_get_gather_scatter_ops (class loop
*loop
, stmt_vec_info stmt_info
,
2973 gather_scatter_info
*gs_info
,
2974 tree
*dataref_ptr
, tree
*vec_offset
)
2976 gimple_seq stmts
= NULL
;
2977 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2981 edge pe
= loop_preheader_edge (loop
);
2982 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2983 gcc_assert (!new_bb
);
2985 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2986 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2987 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2991 /* Prepare to implement a grouped or strided load or store using
2992 the gather load or scatter store operation described by GS_INFO.
2993 STMT_INFO is the load or store statement.
2995 Set *DATAREF_BUMP to the amount that should be added to the base
2996 address after each copy of the vectorized statement. Set *VEC_OFFSET
2997 to an invariant offset vector in which element I has the value
2998 I * DR_STEP / SCALE. */
3001 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3002 loop_vec_info loop_vinfo
,
3003 gather_scatter_info
*gs_info
,
3004 tree
*dataref_bump
, tree
*vec_offset
)
3006 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3007 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3008 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3011 tree bump
= size_binop (MULT_EXPR
,
3012 fold_convert (sizetype
, DR_STEP (dr
)),
3013 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3014 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
3016 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3018 /* The offset given in GS_INFO can have pointer type, so use the element
3019 type of the vector instead. */
3020 tree offset_type
= TREE_TYPE (gs_info
->offset
);
3021 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
3022 offset_type
= TREE_TYPE (offset_vectype
);
3024 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3025 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
3026 ssize_int (gs_info
->scale
));
3027 step
= fold_convert (offset_type
, step
);
3028 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
3030 /* Create {0, X, X*2, X*3, ...}. */
3031 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
3032 build_zero_cst (offset_type
), step
);
3034 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3037 /* Return the amount that should be added to a vector pointer to move
3038 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3039 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3043 vect_get_data_ptr_increment (dr_vec_info
*dr_info
, tree aggr_type
,
3044 vect_memory_access_type memory_access_type
)
3046 if (memory_access_type
== VMAT_INVARIANT
)
3047 return size_zero_node
;
3049 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3050 tree step
= vect_dr_behavior (dr_info
)->step
;
3051 if (tree_int_cst_sgn (step
) == -1)
3052 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3056 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3059 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3060 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3061 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3064 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3065 vec_info
*vinfo
= stmt_info
->vinfo
;
3066 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3069 op
= gimple_call_arg (stmt
, 0);
3070 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3071 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3073 /* Multiple types in SLP are handled by creating the appropriate number of
3074 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3079 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3081 gcc_assert (ncopies
>= 1);
3083 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3087 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3088 unsigned word_bytes
;
3089 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3092 /* The encoding uses one stepped pattern for each byte in the word. */
3093 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3094 for (unsigned i
= 0; i
< 3; ++i
)
3095 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3096 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3098 vec_perm_indices
indices (elts
, 1, num_bytes
);
3099 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3104 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3105 DUMP_VECT_SCOPE ("vectorizable_bswap");
3108 record_stmt_cost (cost_vec
,
3109 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3110 record_stmt_cost (cost_vec
,
3111 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3116 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3119 vec
<tree
> vec_oprnds
= vNULL
;
3120 stmt_vec_info new_stmt_info
= NULL
;
3121 stmt_vec_info prev_stmt_info
= NULL
;
3122 for (unsigned j
= 0; j
< ncopies
; j
++)
3126 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
3128 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3130 /* Arguments are ready. create the new vector stmt. */
3133 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3136 tree tem
= make_ssa_name (char_vectype
);
3137 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3138 char_vectype
, vop
));
3139 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3140 tree tem2
= make_ssa_name (char_vectype
);
3141 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3142 tem
, tem
, bswap_vconst
);
3143 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3144 tem
= make_ssa_name (vectype
);
3145 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3148 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3150 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3157 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3159 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3161 prev_stmt_info
= new_stmt_info
;
3164 vec_oprnds
.release ();
3168 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3169 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3170 in a single step. On success, store the binary pack code in
3174 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3175 tree_code
*convert_code
)
3177 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3178 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3182 int multi_step_cvt
= 0;
3183 auto_vec
<tree
, 8> interm_types
;
3184 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3185 &code
, &multi_step_cvt
,
3190 *convert_code
= code
;
3194 /* Function vectorizable_call.
3196 Check if STMT_INFO performs a function call that can be vectorized.
3197 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3198 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3199 Return true if STMT_INFO is vectorizable in this way. */
3202 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3203 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3204 stmt_vector_for_cost
*cost_vec
)
3210 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3211 stmt_vec_info prev_stmt_info
;
3212 tree vectype_out
, vectype_in
;
3213 poly_uint64 nunits_in
;
3214 poly_uint64 nunits_out
;
3215 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3216 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3217 vec_info
*vinfo
= stmt_info
->vinfo
;
3218 tree fndecl
, new_temp
, rhs_type
;
3219 enum vect_def_type dt
[4]
3220 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3221 vect_unknown_def_type
};
3222 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3223 int ndts
= ARRAY_SIZE (dt
);
3225 auto_vec
<tree
, 8> vargs
;
3226 auto_vec
<tree
, 8> orig_vargs
;
3227 enum { NARROW
, NONE
, WIDEN
} modifier
;
3231 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3234 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3238 /* Is STMT_INFO a vectorizable call? */
3239 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3243 if (gimple_call_internal_p (stmt
)
3244 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3245 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3246 /* Handled by vectorizable_load and vectorizable_store. */
3249 if (gimple_call_lhs (stmt
) == NULL_TREE
3250 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3253 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3255 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3257 /* Process function arguments. */
3258 rhs_type
= NULL_TREE
;
3259 vectype_in
= NULL_TREE
;
3260 nargs
= gimple_call_num_args (stmt
);
3262 /* Bail out if the function has more than three arguments, we do not have
3263 interesting builtin functions to vectorize with more than two arguments
3264 except for fma. No arguments is also not good. */
3265 if (nargs
== 0 || nargs
> 4)
3268 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3269 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3270 if (cfn
== CFN_GOMP_SIMD_LANE
)
3273 rhs_type
= unsigned_type_node
;
3277 if (internal_fn_p (cfn
))
3278 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3280 for (i
= 0; i
< nargs
; i
++)
3282 op
= gimple_call_arg (stmt
, i
);
3283 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &vectypes
[i
]))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "use not simple.\n");
3291 /* Skip the mask argument to an internal function. This operand
3292 has been converted via a pattern if necessary. */
3293 if ((int) i
== mask_opno
)
3296 /* We can only handle calls with arguments of the same type. */
3298 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3300 if (dump_enabled_p ())
3301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3302 "argument types differ.\n");
3306 rhs_type
= TREE_TYPE (op
);
3309 vectype_in
= vectypes
[i
];
3310 else if (vectypes
[i
]
3311 && vectypes
[i
] != vectype_in
)
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3315 "argument vector types differ.\n");
3319 /* If all arguments are external or constant defs use a vector type with
3320 the same size as the output vector type. */
3322 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3324 gcc_assert (vectype_in
);
3327 if (dump_enabled_p ())
3328 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3329 "no vectype for scalar type %T\n", rhs_type
);
3335 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3336 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3337 if (known_eq (nunits_in
* 2, nunits_out
))
3339 else if (known_eq (nunits_out
, nunits_in
))
3341 else if (known_eq (nunits_out
* 2, nunits_in
))
3346 /* We only handle functions that do not read or clobber memory. */
3347 if (gimple_vuse (stmt
))
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3351 "function reads from or writes to memory.\n");
3355 /* For now, we only vectorize functions if a target specific builtin
3356 is available. TODO -- in some cases, it might be profitable to
3357 insert the calls for pieces of the vector, in order to be able
3358 to vectorize other operations in the loop. */
3360 internal_fn ifn
= IFN_LAST
;
3361 tree callee
= gimple_call_fndecl (stmt
);
3363 /* First try using an internal function. */
3364 tree_code convert_code
= ERROR_MARK
;
3366 && (modifier
== NONE
3367 || (modifier
== NARROW
3368 && simple_integer_narrowing (vectype_out
, vectype_in
,
3370 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3373 /* If that fails, try asking for a target-specific built-in function. */
3374 if (ifn
== IFN_LAST
)
3376 if (cfn
!= CFN_LAST
)
3377 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3378 (cfn
, vectype_out
, vectype_in
);
3380 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3381 (callee
, vectype_out
, vectype_in
);
3384 if (ifn
== IFN_LAST
&& !fndecl
)
3386 if (cfn
== CFN_GOMP_SIMD_LANE
3389 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3390 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3391 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3392 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3394 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3395 { 0, 1, 2, ... vf - 1 } vector. */
3396 gcc_assert (nargs
== 0);
3398 else if (modifier
== NONE
3399 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3400 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3401 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3402 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3403 vectype_in
, cost_vec
);
3406 if (dump_enabled_p ())
3407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3408 "function is not vectorizable.\n");
3415 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3416 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3418 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3420 /* Sanity check: make sure that at least one copy of the vectorized stmt
3421 needs to be generated. */
3422 gcc_assert (ncopies
>= 1);
3424 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3425 if (!vec_stmt
) /* transformation not required. */
3427 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3428 DUMP_VECT_SCOPE ("vectorizable_call");
3429 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3430 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3431 record_stmt_cost (cost_vec
, ncopies
/ 2,
3432 vec_promote_demote
, stmt_info
, 0, vect_body
);
3434 if (loop_vinfo
&& mask_opno
>= 0)
3436 unsigned int nvectors
= (slp_node
3437 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3439 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
);
3446 if (dump_enabled_p ())
3447 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3450 scalar_dest
= gimple_call_lhs (stmt
);
3451 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3453 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3455 stmt_vec_info new_stmt_info
= NULL
;
3456 prev_stmt_info
= NULL
;
3457 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3459 tree prev_res
= NULL_TREE
;
3460 vargs
.safe_grow (nargs
);
3461 orig_vargs
.safe_grow (nargs
);
3462 for (j
= 0; j
< ncopies
; ++j
)
3464 /* Build argument list for the vectorized call. */
3467 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3468 vec
<tree
> vec_oprnds0
;
3470 for (i
= 0; i
< nargs
; i
++)
3471 vargs
[i
] = gimple_call_arg (stmt
, i
);
3472 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3473 vec_oprnds0
= vec_defs
[0];
3475 /* Arguments are ready. Create the new vector stmt. */
3476 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3479 for (k
= 0; k
< nargs
; k
++)
3481 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3482 vargs
[k
] = vec_oprndsk
[i
];
3484 if (modifier
== NARROW
)
3486 /* We don't define any narrowing conditional functions
3488 gcc_assert (mask_opno
< 0);
3489 tree half_res
= make_ssa_name (vectype_in
);
3491 = gimple_build_call_internal_vec (ifn
, vargs
);
3492 gimple_call_set_lhs (call
, half_res
);
3493 gimple_call_set_nothrow (call
, true);
3494 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3497 prev_res
= half_res
;
3500 new_temp
= make_ssa_name (vec_dest
);
3502 = gimple_build_assign (new_temp
, convert_code
,
3503 prev_res
, half_res
);
3505 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3510 if (mask_opno
>= 0 && masked_loop_p
)
3512 unsigned int vec_num
= vec_oprnds0
.length ();
3513 /* Always true for SLP. */
3514 gcc_assert (ncopies
== 1);
3515 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3517 vargs
[mask_opno
] = prepare_load_store_mask
3518 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3522 if (ifn
!= IFN_LAST
)
3523 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3525 call
= gimple_build_call_vec (fndecl
, vargs
);
3526 new_temp
= make_ssa_name (vec_dest
, call
);
3527 gimple_call_set_lhs (call
, new_temp
);
3528 gimple_call_set_nothrow (call
, true);
3530 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3532 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3535 for (i
= 0; i
< nargs
; i
++)
3537 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3538 vec_oprndsi
.release ();
3543 if (mask_opno
>= 0 && !vectypes
[mask_opno
])
3545 gcc_assert (modifier
!= WIDEN
);
3547 = build_same_sized_truth_vector_type (vectype_in
);
3550 for (i
= 0; i
< nargs
; i
++)
3552 op
= gimple_call_arg (stmt
, i
);
3555 = vect_get_vec_def_for_operand (op
, stmt_info
, vectypes
[i
]);
3558 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3560 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3563 if (mask_opno
>= 0 && masked_loop_p
)
3565 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3568 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3569 vargs
[mask_opno
], gsi
);
3572 if (cfn
== CFN_GOMP_SIMD_LANE
)
3574 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3576 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3577 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3578 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3579 new_temp
= make_ssa_name (vec_dest
);
3580 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3582 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3584 else if (modifier
== NARROW
)
3586 /* We don't define any narrowing conditional functions at
3588 gcc_assert (mask_opno
< 0);
3589 tree half_res
= make_ssa_name (vectype_in
);
3590 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3591 gimple_call_set_lhs (call
, half_res
);
3592 gimple_call_set_nothrow (call
, true);
3593 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3596 prev_res
= half_res
;
3599 new_temp
= make_ssa_name (vec_dest
);
3600 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3601 prev_res
, half_res
);
3603 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3608 if (ifn
!= IFN_LAST
)
3609 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3611 call
= gimple_build_call_vec (fndecl
, vargs
);
3612 new_temp
= make_ssa_name (vec_dest
, call
);
3613 gimple_call_set_lhs (call
, new_temp
);
3614 gimple_call_set_nothrow (call
, true);
3616 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3619 if (j
== (modifier
== NARROW
? 1 : 0))
3620 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3622 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3624 prev_stmt_info
= new_stmt_info
;
3627 else if (modifier
== NARROW
)
3629 /* We don't define any narrowing conditional functions at present. */
3630 gcc_assert (mask_opno
< 0);
3631 for (j
= 0; j
< ncopies
; ++j
)
3633 /* Build argument list for the vectorized call. */
3635 vargs
.create (nargs
* 2);
3641 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3642 vec
<tree
> vec_oprnds0
;
3644 for (i
= 0; i
< nargs
; i
++)
3645 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3646 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3647 vec_oprnds0
= vec_defs
[0];
3649 /* Arguments are ready. Create the new vector stmt. */
3650 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3654 for (k
= 0; k
< nargs
; k
++)
3656 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3657 vargs
.quick_push (vec_oprndsk
[i
]);
3658 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3661 if (ifn
!= IFN_LAST
)
3662 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3664 call
= gimple_build_call_vec (fndecl
, vargs
);
3665 new_temp
= make_ssa_name (vec_dest
, call
);
3666 gimple_call_set_lhs (call
, new_temp
);
3667 gimple_call_set_nothrow (call
, true);
3669 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3670 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3673 for (i
= 0; i
< nargs
; i
++)
3675 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3676 vec_oprndsi
.release ();
3681 for (i
= 0; i
< nargs
; i
++)
3683 op
= gimple_call_arg (stmt
, i
);
3687 = vect_get_vec_def_for_operand (op
, stmt_info
,
3690 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3694 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3697 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3699 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3702 vargs
.quick_push (vec_oprnd0
);
3703 vargs
.quick_push (vec_oprnd1
);
3706 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3707 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3708 gimple_call_set_lhs (new_stmt
, new_temp
);
3710 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3713 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3715 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3717 prev_stmt_info
= new_stmt_info
;
3720 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3723 /* No current target implements this case. */
3728 /* The call in STMT might prevent it from being removed in dce.
3729 We however cannot remove it here, due to the way the ssa name
3730 it defines is mapped to the new definition. So just replace
3731 rhs of the statement with something harmless. */
3736 stmt_info
= vect_orig_stmt (stmt_info
);
3737 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3740 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3741 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3747 struct simd_call_arg_info
3751 HOST_WIDE_INT linear_step
;
3752 enum vect_def_type dt
;
3754 bool simd_lane_linear
;
3757 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3758 is linear within simd lane (but not within whole loop), note it in
3762 vect_simd_lane_linear (tree op
, class loop
*loop
,
3763 struct simd_call_arg_info
*arginfo
)
3765 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3767 if (!is_gimple_assign (def_stmt
)
3768 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3769 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3772 tree base
= gimple_assign_rhs1 (def_stmt
);
3773 HOST_WIDE_INT linear_step
= 0;
3774 tree v
= gimple_assign_rhs2 (def_stmt
);
3775 while (TREE_CODE (v
) == SSA_NAME
)
3778 def_stmt
= SSA_NAME_DEF_STMT (v
);
3779 if (is_gimple_assign (def_stmt
))
3780 switch (gimple_assign_rhs_code (def_stmt
))
3783 t
= gimple_assign_rhs2 (def_stmt
);
3784 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3786 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3787 v
= gimple_assign_rhs1 (def_stmt
);
3790 t
= gimple_assign_rhs2 (def_stmt
);
3791 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3793 linear_step
= tree_to_shwi (t
);
3794 v
= gimple_assign_rhs1 (def_stmt
);
3797 t
= gimple_assign_rhs1 (def_stmt
);
3798 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3799 || (TYPE_PRECISION (TREE_TYPE (v
))
3800 < TYPE_PRECISION (TREE_TYPE (t
))))
3809 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3811 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3812 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3817 arginfo
->linear_step
= linear_step
;
3819 arginfo
->simd_lane_linear
= true;
3825 /* Return the number of elements in vector type VECTYPE, which is associated
3826 with a SIMD clone. At present these vectors always have a constant
3829 static unsigned HOST_WIDE_INT
3830 simd_clone_subparts (tree vectype
)
3832 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3835 /* Function vectorizable_simd_clone_call.
3837 Check if STMT_INFO performs a function call that can be vectorized
3838 by calling a simd clone of the function.
3839 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3840 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3841 Return true if STMT_INFO is vectorizable in this way. */
3844 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3845 gimple_stmt_iterator
*gsi
,
3846 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3847 stmt_vector_for_cost
*)
3852 tree vec_oprnd0
= NULL_TREE
;
3853 stmt_vec_info prev_stmt_info
;
3855 unsigned int nunits
;
3856 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3857 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3858 vec_info
*vinfo
= stmt_info
->vinfo
;
3859 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3860 tree fndecl
, new_temp
;
3862 auto_vec
<simd_call_arg_info
> arginfo
;
3863 vec
<tree
> vargs
= vNULL
;
3865 tree lhs
, rtype
, ratype
;
3866 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3868 /* Is STMT a vectorizable call? */
3869 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3873 fndecl
= gimple_call_fndecl (stmt
);
3874 if (fndecl
== NULL_TREE
)
3877 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3878 if (node
== NULL
|| node
->simd_clones
== NULL
)
3881 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3884 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3888 if (gimple_call_lhs (stmt
)
3889 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3892 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3894 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3896 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3903 /* Process function arguments. */
3904 nargs
= gimple_call_num_args (stmt
);
3906 /* Bail out if the function has zero arguments. */
3910 arginfo
.reserve (nargs
, true);
3912 for (i
= 0; i
< nargs
; i
++)
3914 simd_call_arg_info thisarginfo
;
3917 thisarginfo
.linear_step
= 0;
3918 thisarginfo
.align
= 0;
3919 thisarginfo
.op
= NULL_TREE
;
3920 thisarginfo
.simd_lane_linear
= false;
3922 op
= gimple_call_arg (stmt
, i
);
3923 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3924 &thisarginfo
.vectype
)
3925 || thisarginfo
.dt
== vect_uninitialized_def
)
3927 if (dump_enabled_p ())
3928 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3929 "use not simple.\n");
3933 if (thisarginfo
.dt
== vect_constant_def
3934 || thisarginfo
.dt
== vect_external_def
)
3935 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3937 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3939 /* For linear arguments, the analyze phase should have saved
3940 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3941 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3942 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3944 gcc_assert (vec_stmt
);
3945 thisarginfo
.linear_step
3946 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3948 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3949 thisarginfo
.simd_lane_linear
3950 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3951 == boolean_true_node
);
3952 /* If loop has been peeled for alignment, we need to adjust it. */
3953 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3954 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3955 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3957 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3958 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3959 tree opt
= TREE_TYPE (thisarginfo
.op
);
3960 bias
= fold_convert (TREE_TYPE (step
), bias
);
3961 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3963 = fold_build2 (POINTER_TYPE_P (opt
)
3964 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3965 thisarginfo
.op
, bias
);
3969 && thisarginfo
.dt
!= vect_constant_def
3970 && thisarginfo
.dt
!= vect_external_def
3972 && TREE_CODE (op
) == SSA_NAME
3973 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3975 && tree_fits_shwi_p (iv
.step
))
3977 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3978 thisarginfo
.op
= iv
.base
;
3980 else if ((thisarginfo
.dt
== vect_constant_def
3981 || thisarginfo
.dt
== vect_external_def
)
3982 && POINTER_TYPE_P (TREE_TYPE (op
)))
3983 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3984 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3986 if (POINTER_TYPE_P (TREE_TYPE (op
))
3987 && !thisarginfo
.linear_step
3989 && thisarginfo
.dt
!= vect_constant_def
3990 && thisarginfo
.dt
!= vect_external_def
3993 && TREE_CODE (op
) == SSA_NAME
)
3994 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3996 arginfo
.quick_push (thisarginfo
);
3999 unsigned HOST_WIDE_INT vf
;
4000 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
4002 if (dump_enabled_p ())
4003 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4004 "not considering SIMD clones; not yet supported"
4005 " for variable-width vectors.\n");
4009 unsigned int badness
= 0;
4010 struct cgraph_node
*bestn
= NULL
;
4011 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4012 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4014 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4015 n
= n
->simdclone
->next_clone
)
4017 unsigned int this_badness
= 0;
4018 if (n
->simdclone
->simdlen
> vf
4019 || n
->simdclone
->nargs
!= nargs
)
4021 if (n
->simdclone
->simdlen
< vf
)
4022 this_badness
+= (exact_log2 (vf
)
4023 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4024 if (n
->simdclone
->inbranch
)
4025 this_badness
+= 2048;
4026 int target_badness
= targetm
.simd_clone
.usable (n
);
4027 if (target_badness
< 0)
4029 this_badness
+= target_badness
* 512;
4030 /* FORNOW: Have to add code to add the mask argument. */
4031 if (n
->simdclone
->inbranch
)
4033 for (i
= 0; i
< nargs
; i
++)
4035 switch (n
->simdclone
->args
[i
].arg_type
)
4037 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4038 if (!useless_type_conversion_p
4039 (n
->simdclone
->args
[i
].orig_type
,
4040 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4042 else if (arginfo
[i
].dt
== vect_constant_def
4043 || arginfo
[i
].dt
== vect_external_def
4044 || arginfo
[i
].linear_step
)
4047 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4048 if (arginfo
[i
].dt
!= vect_constant_def
4049 && arginfo
[i
].dt
!= vect_external_def
)
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4053 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4054 if (arginfo
[i
].dt
== vect_constant_def
4055 || arginfo
[i
].dt
== vect_external_def
4056 || (arginfo
[i
].linear_step
4057 != n
->simdclone
->args
[i
].linear_step
))
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4061 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4062 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4063 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4064 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4065 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4069 case SIMD_CLONE_ARG_TYPE_MASK
:
4072 if (i
== (size_t) -1)
4074 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4079 if (arginfo
[i
].align
)
4080 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4081 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4083 if (i
== (size_t) -1)
4085 if (bestn
== NULL
|| this_badness
< badness
)
4088 badness
= this_badness
;
4095 for (i
= 0; i
< nargs
; i
++)
4096 if ((arginfo
[i
].dt
== vect_constant_def
4097 || arginfo
[i
].dt
== vect_external_def
)
4098 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4101 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
4103 if (arginfo
[i
].vectype
== NULL
4104 || (simd_clone_subparts (arginfo
[i
].vectype
)
4105 > bestn
->simdclone
->simdlen
))
4109 fndecl
= bestn
->decl
;
4110 nunits
= bestn
->simdclone
->simdlen
;
4111 ncopies
= vf
/ nunits
;
4113 /* If the function isn't const, only allow it in simd loops where user
4114 has asserted that at least nunits consecutive iterations can be
4115 performed using SIMD instructions. */
4116 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4117 && gimple_vuse (stmt
))
4120 /* Sanity check: make sure that at least one copy of the vectorized stmt
4121 needs to be generated. */
4122 gcc_assert (ncopies
>= 1);
4124 if (!vec_stmt
) /* transformation not required. */
4126 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4127 for (i
= 0; i
< nargs
; i
++)
4128 if ((bestn
->simdclone
->args
[i
].arg_type
4129 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4130 || (bestn
->simdclone
->args
[i
].arg_type
4131 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4135 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4136 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4137 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4138 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4139 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4140 tree sll
= arginfo
[i
].simd_lane_linear
4141 ? boolean_true_node
: boolean_false_node
;
4142 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4144 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4145 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4146 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4156 scalar_dest
= gimple_call_lhs (stmt
);
4157 vec_dest
= NULL_TREE
;
4162 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4163 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4164 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4167 rtype
= TREE_TYPE (ratype
);
4171 prev_stmt_info
= NULL
;
4172 for (j
= 0; j
< ncopies
; ++j
)
4174 /* Build argument list for the vectorized call. */
4176 vargs
.create (nargs
);
4180 for (i
= 0; i
< nargs
; i
++)
4182 unsigned int k
, l
, m
, o
;
4184 op
= gimple_call_arg (stmt
, i
);
4185 switch (bestn
->simdclone
->args
[i
].arg_type
)
4187 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4188 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4189 o
= nunits
/ simd_clone_subparts (atype
);
4190 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4192 if (simd_clone_subparts (atype
)
4193 < simd_clone_subparts (arginfo
[i
].vectype
))
4195 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4196 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4197 / simd_clone_subparts (atype
));
4198 gcc_assert ((k
& (k
- 1)) == 0);
4201 = vect_get_vec_def_for_operand (op
, stmt_info
);
4204 vec_oprnd0
= arginfo
[i
].op
;
4205 if ((m
& (k
- 1)) == 0)
4207 = vect_get_vec_def_for_stmt_copy (vinfo
,
4210 arginfo
[i
].op
= vec_oprnd0
;
4212 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4214 bitsize_int ((m
& (k
- 1)) * prec
));
4216 = gimple_build_assign (make_ssa_name (atype
),
4218 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4219 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4223 k
= (simd_clone_subparts (atype
)
4224 / simd_clone_subparts (arginfo
[i
].vectype
));
4225 gcc_assert ((k
& (k
- 1)) == 0);
4226 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4228 vec_alloc (ctor_elts
, k
);
4231 for (l
= 0; l
< k
; l
++)
4233 if (m
== 0 && l
== 0)
4235 = vect_get_vec_def_for_operand (op
, stmt_info
);
4238 = vect_get_vec_def_for_stmt_copy (vinfo
,
4240 arginfo
[i
].op
= vec_oprnd0
;
4243 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4247 vargs
.safe_push (vec_oprnd0
);
4250 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4252 = gimple_build_assign (make_ssa_name (atype
),
4254 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4256 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4261 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4262 vargs
.safe_push (op
);
4264 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4265 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4270 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4275 edge pe
= loop_preheader_edge (loop
);
4276 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4277 gcc_assert (!new_bb
);
4279 if (arginfo
[i
].simd_lane_linear
)
4281 vargs
.safe_push (arginfo
[i
].op
);
4284 tree phi_res
= copy_ssa_name (op
);
4285 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4286 loop_vinfo
->add_stmt (new_phi
);
4287 add_phi_arg (new_phi
, arginfo
[i
].op
,
4288 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4290 = POINTER_TYPE_P (TREE_TYPE (op
))
4291 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4292 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4293 ? sizetype
: TREE_TYPE (op
);
4295 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4297 tree tcst
= wide_int_to_tree (type
, cst
);
4298 tree phi_arg
= copy_ssa_name (op
);
4300 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4301 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4302 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4303 loop_vinfo
->add_stmt (new_stmt
);
4304 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4306 arginfo
[i
].op
= phi_res
;
4307 vargs
.safe_push (phi_res
);
4312 = POINTER_TYPE_P (TREE_TYPE (op
))
4313 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4314 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4315 ? sizetype
: TREE_TYPE (op
);
4317 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4319 tree tcst
= wide_int_to_tree (type
, cst
);
4320 new_temp
= make_ssa_name (TREE_TYPE (op
));
4322 = gimple_build_assign (new_temp
, code
,
4323 arginfo
[i
].op
, tcst
);
4324 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4325 vargs
.safe_push (new_temp
);
4328 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4329 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4330 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4331 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4332 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4333 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4339 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4342 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4344 new_temp
= create_tmp_var (ratype
);
4345 else if (simd_clone_subparts (vectype
)
4346 == simd_clone_subparts (rtype
))
4347 new_temp
= make_ssa_name (vec_dest
, new_call
);
4349 new_temp
= make_ssa_name (rtype
, new_call
);
4350 gimple_call_set_lhs (new_call
, new_temp
);
4352 stmt_vec_info new_stmt_info
4353 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4357 if (simd_clone_subparts (vectype
) < nunits
)
4360 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4361 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4362 k
= nunits
/ simd_clone_subparts (vectype
);
4363 gcc_assert ((k
& (k
- 1)) == 0);
4364 for (l
= 0; l
< k
; l
++)
4369 t
= build_fold_addr_expr (new_temp
);
4370 t
= build2 (MEM_REF
, vectype
, t
,
4371 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4374 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4375 bitsize_int (prec
), bitsize_int (l
* prec
));
4377 = gimple_build_assign (make_ssa_name (vectype
), t
);
4379 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4381 if (j
== 0 && l
== 0)
4382 STMT_VINFO_VEC_STMT (stmt_info
)
4383 = *vec_stmt
= new_stmt_info
;
4385 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4387 prev_stmt_info
= new_stmt_info
;
4391 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4394 else if (simd_clone_subparts (vectype
) > nunits
)
4396 unsigned int k
= (simd_clone_subparts (vectype
)
4397 / simd_clone_subparts (rtype
));
4398 gcc_assert ((k
& (k
- 1)) == 0);
4399 if ((j
& (k
- 1)) == 0)
4400 vec_alloc (ret_ctor_elts
, k
);
4403 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4404 for (m
= 0; m
< o
; m
++)
4406 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4407 size_int (m
), NULL_TREE
, NULL_TREE
);
4409 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4411 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4413 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4414 gimple_assign_lhs (new_stmt
));
4416 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4419 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4420 if ((j
& (k
- 1)) != k
- 1)
4422 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4424 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4426 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4428 if ((unsigned) j
== k
- 1)
4429 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4431 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4433 prev_stmt_info
= new_stmt_info
;
4438 tree t
= build_fold_addr_expr (new_temp
);
4439 t
= build2 (MEM_REF
, vectype
, t
,
4440 build_int_cst (TREE_TYPE (t
), 0));
4442 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4444 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4445 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4450 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4452 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4454 prev_stmt_info
= new_stmt_info
;
4459 /* The call in STMT might prevent it from being removed in dce.
4460 We however cannot remove it here, due to the way the ssa name
4461 it defines is mapped to the new definition. So just replace
4462 rhs of the statement with something harmless. */
4470 type
= TREE_TYPE (scalar_dest
);
4471 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4472 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4475 new_stmt
= gimple_build_nop ();
4476 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4477 unlink_stmt_vdef (stmt
);
4483 /* Function vect_gen_widened_results_half
4485 Create a vector stmt whose code, type, number of arguments, and result
4486 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4487 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4488 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4489 needs to be created (DECL is a function-decl of a target-builtin).
4490 STMT_INFO is the original scalar stmt that we are vectorizing. */
4493 vect_gen_widened_results_half (enum tree_code code
,
4495 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4496 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4497 stmt_vec_info stmt_info
)
4502 /* Generate half of the widened result: */
4503 if (code
== CALL_EXPR
)
4505 /* Target specific support */
4506 if (op_type
== binary_op
)
4507 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4509 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4510 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4511 gimple_call_set_lhs (new_stmt
, new_temp
);
4515 /* Generic support */
4516 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4517 if (op_type
!= binary_op
)
4519 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4520 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4521 gimple_assign_set_lhs (new_stmt
, new_temp
);
4523 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4529 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4530 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4531 containing scalar operand), and for the rest we get a copy with
4532 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4533 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4534 The vectors are collected into VEC_OPRNDS. */
4537 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4538 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4540 vec_info
*vinfo
= stmt_info
->vinfo
;
4543 /* Get first vector operand. */
4544 /* All the vector operands except the very first one (that is scalar oprnd)
4546 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4547 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4549 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4551 vec_oprnds
->quick_push (vec_oprnd
);
4553 /* Get second vector operand. */
4554 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4555 vec_oprnds
->quick_push (vec_oprnd
);
4559 /* For conversion in multiple steps, continue to get operands
4562 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4563 multi_step_cvt
- 1);
4567 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4568 For multi-step conversions store the resulting vectors and call the function
4572 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4574 stmt_vec_info stmt_info
,
4576 gimple_stmt_iterator
*gsi
,
4577 slp_tree slp_node
, enum tree_code code
,
4578 stmt_vec_info
*prev_stmt_info
)
4581 tree vop0
, vop1
, new_tmp
, vec_dest
;
4583 vec_dest
= vec_dsts
.pop ();
4585 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4587 /* Create demotion operation. */
4588 vop0
= (*vec_oprnds
)[i
];
4589 vop1
= (*vec_oprnds
)[i
+ 1];
4590 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4591 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4592 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4593 stmt_vec_info new_stmt_info
4594 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4597 /* Store the resulting vector for next recursive call. */
4598 (*vec_oprnds
)[i
/2] = new_tmp
;
4601 /* This is the last step of the conversion sequence. Store the
4602 vectors in SLP_NODE or in vector info of the scalar statement
4603 (or in STMT_VINFO_RELATED_STMT chain). */
4605 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4608 if (!*prev_stmt_info
)
4609 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4611 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4613 *prev_stmt_info
= new_stmt_info
;
4618 /* For multi-step demotion operations we first generate demotion operations
4619 from the source type to the intermediate types, and then combine the
4620 results (stored in VEC_OPRNDS) in demotion operation to the destination
4624 /* At each level of recursion we have half of the operands we had at the
4626 vec_oprnds
->truncate ((i
+1)/2);
4627 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4628 stmt_info
, vec_dsts
, gsi
,
4629 slp_node
, VEC_PACK_TRUNC_EXPR
,
4633 vec_dsts
.quick_push (vec_dest
);
4637 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4638 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4639 STMT_INFO. For multi-step conversions store the resulting vectors and
4640 call the function recursively. */
4643 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4644 vec
<tree
> *vec_oprnds1
,
4645 stmt_vec_info stmt_info
, tree vec_dest
,
4646 gimple_stmt_iterator
*gsi
,
4647 enum tree_code code1
,
4648 enum tree_code code2
, tree decl1
,
4649 tree decl2
, int op_type
)
4652 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4653 gimple
*new_stmt1
, *new_stmt2
;
4654 vec
<tree
> vec_tmp
= vNULL
;
4656 vec_tmp
.create (vec_oprnds0
->length () * 2);
4657 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4659 if (op_type
== binary_op
)
4660 vop1
= (*vec_oprnds1
)[i
];
4664 /* Generate the two halves of promotion operation. */
4665 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4666 op_type
, vec_dest
, gsi
,
4668 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4669 op_type
, vec_dest
, gsi
,
4671 if (is_gimple_call (new_stmt1
))
4673 new_tmp1
= gimple_call_lhs (new_stmt1
);
4674 new_tmp2
= gimple_call_lhs (new_stmt2
);
4678 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4679 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4682 /* Store the results for the next step. */
4683 vec_tmp
.quick_push (new_tmp1
);
4684 vec_tmp
.quick_push (new_tmp2
);
4687 vec_oprnds0
->release ();
4688 *vec_oprnds0
= vec_tmp
;
4692 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4693 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4694 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4695 Return true if STMT_INFO is vectorizable in this way. */
4698 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4699 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4700 stmt_vector_for_cost
*cost_vec
)
4704 tree op0
, op1
= NULL_TREE
;
4705 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4706 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4707 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4708 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4709 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4711 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4713 stmt_vec_info prev_stmt_info
;
4714 poly_uint64 nunits_in
;
4715 poly_uint64 nunits_out
;
4716 tree vectype_out
, vectype_in
;
4718 tree lhs_type
, rhs_type
;
4719 enum { NARROW
, NONE
, WIDEN
} modifier
;
4720 vec
<tree
> vec_oprnds0
= vNULL
;
4721 vec
<tree
> vec_oprnds1
= vNULL
;
4723 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4724 vec_info
*vinfo
= stmt_info
->vinfo
;
4725 int multi_step_cvt
= 0;
4726 vec
<tree
> interm_types
= vNULL
;
4727 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4729 unsigned short fltsz
;
4731 /* Is STMT a vectorizable conversion? */
4733 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4736 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4740 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4744 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4747 code
= gimple_assign_rhs_code (stmt
);
4748 if (!CONVERT_EXPR_CODE_P (code
)
4749 && code
!= FIX_TRUNC_EXPR
4750 && code
!= FLOAT_EXPR
4751 && code
!= WIDEN_MULT_EXPR
4752 && code
!= WIDEN_LSHIFT_EXPR
)
4755 op_type
= TREE_CODE_LENGTH (code
);
4757 /* Check types of lhs and rhs. */
4758 scalar_dest
= gimple_assign_lhs (stmt
);
4759 lhs_type
= TREE_TYPE (scalar_dest
);
4760 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4762 op0
= gimple_assign_rhs1 (stmt
);
4763 rhs_type
= TREE_TYPE (op0
);
4765 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4766 && !((INTEGRAL_TYPE_P (lhs_type
)
4767 && INTEGRAL_TYPE_P (rhs_type
))
4768 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4769 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4772 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4773 && ((INTEGRAL_TYPE_P (lhs_type
)
4774 && !type_has_mode_precision_p (lhs_type
))
4775 || (INTEGRAL_TYPE_P (rhs_type
)
4776 && !type_has_mode_precision_p (rhs_type
))))
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4780 "type conversion to/from bit-precision unsupported."
4785 /* Check the operands of the operation. */
4786 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4788 if (dump_enabled_p ())
4789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4790 "use not simple.\n");
4793 if (op_type
== binary_op
)
4797 op1
= gimple_assign_rhs2 (stmt
);
4798 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4799 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4801 if (CONSTANT_CLASS_P (op0
))
4802 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4804 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4808 if (dump_enabled_p ())
4809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4810 "use not simple.\n");
4815 /* If op0 is an external or constant defs use a vector type of
4816 the same size as the output vector type. */
4818 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4820 gcc_assert (vectype_in
);
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4825 "no vectype for scalar type %T\n", rhs_type
);
4830 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4831 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4833 if (dump_enabled_p ())
4834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4835 "can't convert between boolean and non "
4836 "boolean vectors %T\n", rhs_type
);
4841 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4842 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4843 if (known_eq (nunits_out
, nunits_in
))
4845 else if (multiple_p (nunits_out
, nunits_in
))
4849 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4853 /* Multiple types in SLP are handled by creating the appropriate number of
4854 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4858 else if (modifier
== NARROW
)
4859 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4861 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4863 /* Sanity check: make sure that at least one copy of the vectorized stmt
4864 needs to be generated. */
4865 gcc_assert (ncopies
>= 1);
4867 bool found_mode
= false;
4868 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4869 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4870 opt_scalar_mode rhs_mode_iter
;
4872 /* Supportable by target? */
4876 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4878 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4885 "conversion not supported by target.\n");
4889 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4890 vectype_in
, &code1
, &code2
,
4891 &multi_step_cvt
, &interm_types
))
4893 /* Binary widening operation can only be supported directly by the
4895 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4899 if (code
!= FLOAT_EXPR
4900 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4903 fltsz
= GET_MODE_SIZE (lhs_mode
);
4904 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4906 rhs_mode
= rhs_mode_iter
.require ();
4907 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4911 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4912 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4913 if (cvt_type
== NULL_TREE
)
4916 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4918 if (!supportable_convert_operation (code
, vectype_out
,
4919 cvt_type
, &decl1
, &codecvt1
))
4922 else if (!supportable_widening_operation (code
, stmt_info
,
4923 vectype_out
, cvt_type
,
4924 &codecvt1
, &codecvt2
,
4929 gcc_assert (multi_step_cvt
== 0);
4931 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4932 vectype_in
, &code1
, &code2
,
4933 &multi_step_cvt
, &interm_types
))
4943 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4944 codecvt2
= ERROR_MARK
;
4948 interm_types
.safe_push (cvt_type
);
4949 cvt_type
= NULL_TREE
;
4954 gcc_assert (op_type
== unary_op
);
4955 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4956 &code1
, &multi_step_cvt
,
4960 if (code
!= FIX_TRUNC_EXPR
4961 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4965 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4966 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4967 if (cvt_type
== NULL_TREE
)
4969 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4972 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4973 &code1
, &multi_step_cvt
,
4982 if (!vec_stmt
) /* transformation not required. */
4984 DUMP_VECT_SCOPE ("vectorizable_conversion");
4985 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4987 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4988 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4991 else if (modifier
== NARROW
)
4993 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4994 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4999 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5000 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
5003 interm_types
.release ();
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE
, vect_location
,
5010 "transform conversion. ncopies = %d.\n", ncopies
);
5012 if (op_type
== binary_op
)
5014 if (CONSTANT_CLASS_P (op0
))
5015 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5016 else if (CONSTANT_CLASS_P (op1
))
5017 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5020 /* In case of multi-step conversion, we first generate conversion operations
5021 to the intermediate types, and then from that types to the final one.
5022 We create vector destinations for the intermediate type (TYPES) received
5023 from supportable_*_operation, and store them in the correct order
5024 for future use in vect_create_vectorized_*_stmts (). */
5025 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5026 vec_dest
= vect_create_destination_var (scalar_dest
,
5027 (cvt_type
&& modifier
== WIDEN
)
5028 ? cvt_type
: vectype_out
);
5029 vec_dsts
.quick_push (vec_dest
);
5033 for (i
= interm_types
.length () - 1;
5034 interm_types
.iterate (i
, &intermediate_type
); i
--)
5036 vec_dest
= vect_create_destination_var (scalar_dest
,
5038 vec_dsts
.quick_push (vec_dest
);
5043 vec_dest
= vect_create_destination_var (scalar_dest
,
5045 ? vectype_out
: cvt_type
);
5049 if (modifier
== WIDEN
)
5051 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5052 if (op_type
== binary_op
)
5053 vec_oprnds1
.create (1);
5055 else if (modifier
== NARROW
)
5056 vec_oprnds0
.create (
5057 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5059 else if (code
== WIDEN_LSHIFT_EXPR
)
5060 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5063 prev_stmt_info
= NULL
;
5067 for (j
= 0; j
< ncopies
; j
++)
5070 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
5073 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5075 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5077 stmt_vec_info new_stmt_info
;
5078 /* Arguments are ready, create the new vector stmt. */
5079 if (code1
== CALL_EXPR
)
5081 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5082 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5083 gimple_call_set_lhs (new_stmt
, new_temp
);
5085 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5089 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5091 = gimple_build_assign (vec_dest
, code1
, vop0
);
5092 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5093 gimple_assign_set_lhs (new_stmt
, new_temp
);
5095 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5099 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5102 if (!prev_stmt_info
)
5103 STMT_VINFO_VEC_STMT (stmt_info
)
5104 = *vec_stmt
= new_stmt_info
;
5106 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5107 prev_stmt_info
= new_stmt_info
;
5114 /* In case the vectorization factor (VF) is bigger than the number
5115 of elements that we can fit in a vectype (nunits), we have to
5116 generate more than one vector stmt - i.e - we need to "unroll"
5117 the vector stmt by a factor VF/nunits. */
5118 for (j
= 0; j
< ncopies
; j
++)
5125 if (code
== WIDEN_LSHIFT_EXPR
)
5130 /* Store vec_oprnd1 for every vector stmt to be created
5131 for SLP_NODE. We check during the analysis that all
5132 the shift arguments are the same. */
5133 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5134 vec_oprnds1
.quick_push (vec_oprnd1
);
5136 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
5137 &vec_oprnds0
, NULL
, slp_node
);
5140 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
5141 &vec_oprnds1
, slp_node
);
5145 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5146 vec_oprnds0
.quick_push (vec_oprnd0
);
5147 if (op_type
== binary_op
)
5149 if (code
== WIDEN_LSHIFT_EXPR
)
5153 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5154 vec_oprnds1
.quick_push (vec_oprnd1
);
5160 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5161 vec_oprnds0
.truncate (0);
5162 vec_oprnds0
.quick_push (vec_oprnd0
);
5163 if (op_type
== binary_op
)
5165 if (code
== WIDEN_LSHIFT_EXPR
)
5168 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5170 vec_oprnds1
.truncate (0);
5171 vec_oprnds1
.quick_push (vec_oprnd1
);
5175 /* Arguments are ready. Create the new vector stmts. */
5176 for (i
= multi_step_cvt
; i
>= 0; i
--)
5178 tree this_dest
= vec_dsts
[i
];
5179 enum tree_code c1
= code1
, c2
= code2
;
5180 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5185 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5186 &vec_oprnds1
, stmt_info
,
5188 c1
, c2
, decl1
, decl2
,
5192 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5194 stmt_vec_info new_stmt_info
;
5197 if (codecvt1
== CALL_EXPR
)
5199 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5200 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5201 gimple_call_set_lhs (new_stmt
, new_temp
);
5203 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5208 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5209 new_temp
= make_ssa_name (vec_dest
);
5211 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5213 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5218 new_stmt_info
= vinfo
->lookup_def (vop0
);
5221 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5224 if (!prev_stmt_info
)
5225 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5227 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5228 prev_stmt_info
= new_stmt_info
;
5233 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5237 /* In case the vectorization factor (VF) is bigger than the number
5238 of elements that we can fit in a vectype (nunits), we have to
5239 generate more than one vector stmt - i.e - we need to "unroll"
5240 the vector stmt by a factor VF/nunits. */
5241 for (j
= 0; j
< ncopies
; j
++)
5245 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5249 vec_oprnds0
.truncate (0);
5250 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5251 vect_pow2 (multi_step_cvt
) - 1);
5254 /* Arguments are ready. Create the new vector stmts. */
5256 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5258 if (codecvt1
== CALL_EXPR
)
5260 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5261 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5262 gimple_call_set_lhs (new_stmt
, new_temp
);
5263 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5267 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5268 new_temp
= make_ssa_name (vec_dest
);
5270 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5271 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5274 vec_oprnds0
[i
] = new_temp
;
5277 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5278 stmt_info
, vec_dsts
, gsi
,
5283 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5287 vec_oprnds0
.release ();
5288 vec_oprnds1
.release ();
5289 interm_types
.release ();
5295 /* Function vectorizable_assignment.
5297 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5298 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5299 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5300 Return true if STMT_INFO is vectorizable in this way. */
5303 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5304 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5305 stmt_vector_for_cost
*cost_vec
)
5310 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5312 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5316 vec
<tree
> vec_oprnds
= vNULL
;
5318 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5319 vec_info
*vinfo
= stmt_info
->vinfo
;
5320 stmt_vec_info prev_stmt_info
= NULL
;
5321 enum tree_code code
;
5324 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5327 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5331 /* Is vectorizable assignment? */
5332 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5336 scalar_dest
= gimple_assign_lhs (stmt
);
5337 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5340 code
= gimple_assign_rhs_code (stmt
);
5341 if (gimple_assign_single_p (stmt
)
5342 || code
== PAREN_EXPR
5343 || CONVERT_EXPR_CODE_P (code
))
5344 op
= gimple_assign_rhs1 (stmt
);
5348 if (code
== VIEW_CONVERT_EXPR
)
5349 op
= TREE_OPERAND (op
, 0);
5351 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5352 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5354 /* Multiple types in SLP are handled by creating the appropriate number of
5355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5360 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5362 gcc_assert (ncopies
>= 1);
5364 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5366 if (dump_enabled_p ())
5367 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5368 "use not simple.\n");
5372 /* We can handle NOP_EXPR conversions that do not change the number
5373 of elements or the vector size. */
5374 if ((CONVERT_EXPR_CODE_P (code
)
5375 || code
== VIEW_CONVERT_EXPR
)
5377 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5378 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5379 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5382 /* We do not handle bit-precision changes. */
5383 if ((CONVERT_EXPR_CODE_P (code
)
5384 || code
== VIEW_CONVERT_EXPR
)
5385 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5386 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5387 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5388 /* But a conversion that does not change the bit-pattern is ok. */
5389 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5390 > TYPE_PRECISION (TREE_TYPE (op
)))
5391 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5392 /* Conversion between boolean types of different sizes is
5393 a simple assignment in case their vectypes are same
5395 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5396 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5400 "type conversion to/from bit-precision "
5405 if (!vec_stmt
) /* transformation not required. */
5407 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5408 DUMP_VECT_SCOPE ("vectorizable_assignment");
5409 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5414 if (dump_enabled_p ())
5415 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5418 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5421 for (j
= 0; j
< ncopies
; j
++)
5425 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5427 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5429 /* Arguments are ready. create the new vector stmt. */
5430 stmt_vec_info new_stmt_info
= NULL
;
5431 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5433 if (CONVERT_EXPR_CODE_P (code
)
5434 || code
== VIEW_CONVERT_EXPR
)
5435 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5436 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5437 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5438 gimple_assign_set_lhs (new_stmt
, new_temp
);
5440 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5442 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5449 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5451 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5453 prev_stmt_info
= new_stmt_info
;
5456 vec_oprnds
.release ();
5461 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5462 either as shift by a scalar or by a vector. */
5465 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5468 machine_mode vec_mode
;
5473 vectype
= get_vectype_for_scalar_type (scalar_type
);
5477 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5479 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5481 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5483 || (optab_handler (optab
, TYPE_MODE (vectype
))
5484 == CODE_FOR_nothing
))
5488 vec_mode
= TYPE_MODE (vectype
);
5489 icode
= (int) optab_handler (optab
, vec_mode
);
5490 if (icode
== CODE_FOR_nothing
)
5497 /* Function vectorizable_shift.
5499 Check if STMT_INFO performs a shift operation that can be vectorized.
5500 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5501 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5502 Return true if STMT_INFO is vectorizable in this way. */
5505 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5506 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5507 stmt_vector_for_cost
*cost_vec
)
5511 tree op0
, op1
= NULL
;
5512 tree vec_oprnd1
= NULL_TREE
;
5514 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5515 enum tree_code code
;
5516 machine_mode vec_mode
;
5520 machine_mode optab_op2_mode
;
5521 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5523 stmt_vec_info prev_stmt_info
;
5524 poly_uint64 nunits_in
;
5525 poly_uint64 nunits_out
;
5530 vec
<tree
> vec_oprnds0
= vNULL
;
5531 vec
<tree
> vec_oprnds1
= vNULL
;
5534 bool scalar_shift_arg
= true;
5535 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5536 vec_info
*vinfo
= stmt_info
->vinfo
;
5538 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5541 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5542 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5546 /* Is STMT a vectorizable binary/unary operation? */
5547 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5551 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5554 code
= gimple_assign_rhs_code (stmt
);
5556 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5557 || code
== RROTATE_EXPR
))
5560 scalar_dest
= gimple_assign_lhs (stmt
);
5561 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5562 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5564 if (dump_enabled_p ())
5565 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5566 "bit-precision shifts not supported.\n");
5570 op0
= gimple_assign_rhs1 (stmt
);
5571 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5573 if (dump_enabled_p ())
5574 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5575 "use not simple.\n");
5578 /* If op0 is an external or constant def use a vector type with
5579 the same size as the output vector type. */
5581 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5583 gcc_assert (vectype
);
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5588 "no vectype for scalar type\n");
5592 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5593 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5594 if (maybe_ne (nunits_out
, nunits_in
))
5597 op1
= gimple_assign_rhs2 (stmt
);
5598 stmt_vec_info op1_def_stmt_info
;
5599 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5600 &op1_def_stmt_info
))
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5604 "use not simple.\n");
5608 /* Multiple types in SLP are handled by creating the appropriate number of
5609 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5614 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5616 gcc_assert (ncopies
>= 1);
5618 /* Determine whether the shift amount is a vector, or scalar. If the
5619 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5621 if ((dt
[1] == vect_internal_def
5622 || dt
[1] == vect_induction_def
5623 || dt
[1] == vect_nested_cycle
)
5625 scalar_shift_arg
= false;
5626 else if (dt
[1] == vect_constant_def
5627 || dt
[1] == vect_external_def
5628 || dt
[1] == vect_internal_def
)
5630 /* In SLP, need to check whether the shift count is the same,
5631 in loops if it is a constant or invariant, it is always
5635 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5636 stmt_vec_info slpstmt_info
;
5638 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5640 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5641 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5642 scalar_shift_arg
= false;
5645 /* For internal SLP defs we have to make sure we see scalar stmts
5646 for all vector elements.
5647 ??? For different vectors we could resort to a different
5648 scalar shift operand but code-generation below simply always
5650 if (dt
[1] == vect_internal_def
5651 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5653 scalar_shift_arg
= false;
5656 /* If the shift amount is computed by a pattern stmt we cannot
5657 use the scalar amount directly thus give up and use a vector
5659 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5660 scalar_shift_arg
= false;
5664 if (dump_enabled_p ())
5665 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5666 "operand mode requires invariant argument.\n");
5670 /* Vector shifted by vector. */
5671 if (!scalar_shift_arg
)
5673 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5674 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_NOTE
, vect_location
,
5676 "vector/vector shift/rotate found.\n");
5679 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5680 if (op1_vectype
== NULL_TREE
5681 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5683 if (dump_enabled_p ())
5684 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5685 "unusable type for last operand in"
5686 " vector/vector shift/rotate.\n");
5690 /* See if the machine has a vector shifted by scalar insn and if not
5691 then see if it has a vector shifted by vector insn. */
5694 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5696 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_NOTE
, vect_location
,
5700 "vector/scalar shift/rotate found.\n");
5704 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5706 && (optab_handler (optab
, TYPE_MODE (vectype
))
5707 != CODE_FOR_nothing
))
5709 scalar_shift_arg
= false;
5711 if (dump_enabled_p ())
5712 dump_printf_loc (MSG_NOTE
, vect_location
,
5713 "vector/vector shift/rotate found.\n");
5715 /* Unlike the other binary operators, shifts/rotates have
5716 the rhs being int, instead of the same type as the lhs,
5717 so make sure the scalar is the right type if we are
5718 dealing with vectors of long long/long/short/char. */
5719 if (dt
[1] == vect_constant_def
)
5720 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5721 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5725 && TYPE_MODE (TREE_TYPE (vectype
))
5726 != TYPE_MODE (TREE_TYPE (op1
)))
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5730 "unusable type for last operand in"
5731 " vector/vector shift/rotate.\n");
5734 if (vec_stmt
&& !slp_node
)
5736 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5737 op1
= vect_init_vector (stmt_info
, op1
,
5738 TREE_TYPE (vectype
), NULL
);
5745 /* Supportable by target? */
5748 if (dump_enabled_p ())
5749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5753 vec_mode
= TYPE_MODE (vectype
);
5754 icode
= (int) optab_handler (optab
, vec_mode
);
5755 if (icode
== CODE_FOR_nothing
)
5757 if (dump_enabled_p ())
5758 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5759 "op not supported by target.\n");
5760 /* Check only during analysis. */
5761 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5763 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_NOTE
, vect_location
,
5767 "proceeding using word mode.\n");
5770 /* Worthwhile without SIMD support? Check only during analysis. */
5772 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5773 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5775 if (dump_enabled_p ())
5776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5777 "not worthwhile without SIMD support.\n");
5781 if (!vec_stmt
) /* transformation not required. */
5783 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5784 DUMP_VECT_SCOPE ("vectorizable_shift");
5785 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_NOTE
, vect_location
,
5793 "transform binary/unary operation.\n");
5796 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5798 prev_stmt_info
= NULL
;
5799 for (j
= 0; j
< ncopies
; j
++)
5804 if (scalar_shift_arg
)
5806 /* Vector shl and shr insn patterns can be defined with scalar
5807 operand 2 (shift operand). In this case, use constant or loop
5808 invariant op1 directly, without extending it to vector mode
5810 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5811 if (!VECTOR_MODE_P (optab_op2_mode
))
5813 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_NOTE
, vect_location
,
5815 "operand 1 using scalar mode.\n");
5817 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5818 vec_oprnds1
.quick_push (vec_oprnd1
);
5821 /* Store vec_oprnd1 for every vector stmt to be created
5822 for SLP_NODE. We check during the analysis that all
5823 the shift arguments are the same.
5824 TODO: Allow different constants for different vector
5825 stmts generated for an SLP instance. */
5826 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5827 vec_oprnds1
.quick_push (vec_oprnd1
);
5832 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5833 (a special case for certain kind of vector shifts); otherwise,
5834 operand 1 should be of a vector type (the usual case). */
5836 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5839 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5843 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5845 /* Arguments are ready. Create the new vector stmt. */
5846 stmt_vec_info new_stmt_info
= NULL
;
5847 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5849 vop1
= vec_oprnds1
[i
];
5850 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5851 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5852 gimple_assign_set_lhs (new_stmt
, new_temp
);
5854 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5856 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5863 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5865 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5866 prev_stmt_info
= new_stmt_info
;
5869 vec_oprnds0
.release ();
5870 vec_oprnds1
.release ();
5876 /* Function vectorizable_operation.
5878 Check if STMT_INFO performs a binary, unary or ternary operation that can
5880 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5881 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5882 Return true if STMT_INFO is vectorizable in this way. */
5885 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5886 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5887 stmt_vector_for_cost
*cost_vec
)
5891 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5893 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5894 enum tree_code code
, orig_code
;
5895 machine_mode vec_mode
;
5899 bool target_support_p
;
5900 enum vect_def_type dt
[3]
5901 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5903 stmt_vec_info prev_stmt_info
;
5904 poly_uint64 nunits_in
;
5905 poly_uint64 nunits_out
;
5909 vec
<tree
> vec_oprnds0
= vNULL
;
5910 vec
<tree
> vec_oprnds1
= vNULL
;
5911 vec
<tree
> vec_oprnds2
= vNULL
;
5912 tree vop0
, vop1
, vop2
;
5913 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5914 vec_info
*vinfo
= stmt_info
->vinfo
;
5916 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5919 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5923 /* Is STMT a vectorizable binary/unary operation? */
5924 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5928 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5931 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5933 /* For pointer addition and subtraction, we should use the normal
5934 plus and minus for the vector operation. */
5935 if (code
== POINTER_PLUS_EXPR
)
5937 if (code
== POINTER_DIFF_EXPR
)
5940 /* Support only unary or binary operations. */
5941 op_type
= TREE_CODE_LENGTH (code
);
5942 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5944 if (dump_enabled_p ())
5945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5946 "num. args = %d (not unary/binary/ternary op).\n",
5951 scalar_dest
= gimple_assign_lhs (stmt
);
5952 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5954 /* Most operations cannot handle bit-precision types without extra
5956 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5957 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5958 /* Exception are bitwise binary operations. */
5959 && code
!= BIT_IOR_EXPR
5960 && code
!= BIT_XOR_EXPR
5961 && code
!= BIT_AND_EXPR
)
5963 if (dump_enabled_p ())
5964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5965 "bit-precision arithmetic not supported.\n");
5969 op0
= gimple_assign_rhs1 (stmt
);
5970 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5972 if (dump_enabled_p ())
5973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5974 "use not simple.\n");
5977 /* If op0 is an external or constant def use a vector type with
5978 the same size as the output vector type. */
5981 /* For boolean type we cannot determine vectype by
5982 invariant value (don't know whether it is a vector
5983 of booleans or vector of integers). We use output
5984 vectype because operations on boolean don't change
5986 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5988 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5990 if (dump_enabled_p ())
5991 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5992 "not supported operation on bool value.\n");
5995 vectype
= vectype_out
;
5998 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
6001 gcc_assert (vectype
);
6004 if (dump_enabled_p ())
6005 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6006 "no vectype for scalar type %T\n",
6012 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6013 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6014 if (maybe_ne (nunits_out
, nunits_in
))
6017 if (op_type
== binary_op
|| op_type
== ternary_op
)
6019 op1
= gimple_assign_rhs2 (stmt
);
6020 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
6022 if (dump_enabled_p ())
6023 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6024 "use not simple.\n");
6028 if (op_type
== ternary_op
)
6030 op2
= gimple_assign_rhs3 (stmt
);
6031 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6035 "use not simple.\n");
6040 /* Multiple types in SLP are handled by creating the appropriate number of
6041 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6046 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6048 gcc_assert (ncopies
>= 1);
6050 /* Shifts are handled in vectorizable_shift (). */
6051 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6052 || code
== RROTATE_EXPR
)
6055 /* Supportable by target? */
6057 vec_mode
= TYPE_MODE (vectype
);
6058 if (code
== MULT_HIGHPART_EXPR
)
6059 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6062 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6065 if (dump_enabled_p ())
6066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6070 target_support_p
= (optab_handler (optab
, vec_mode
)
6071 != CODE_FOR_nothing
);
6074 if (!target_support_p
)
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6078 "op not supported by target.\n");
6079 /* Check only during analysis. */
6080 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6081 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_NOTE
, vect_location
,
6085 "proceeding using word mode.\n");
6088 /* Worthwhile without SIMD support? Check only during analysis. */
6089 if (!VECTOR_MODE_P (vec_mode
)
6091 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6093 if (dump_enabled_p ())
6094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6095 "not worthwhile without SIMD support.\n");
6099 if (!vec_stmt
) /* transformation not required. */
6101 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6102 DUMP_VECT_SCOPE ("vectorizable_operation");
6103 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6109 if (dump_enabled_p ())
6110 dump_printf_loc (MSG_NOTE
, vect_location
,
6111 "transform binary/unary operation.\n");
6113 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6114 vectors with unsigned elements, but the result is signed. So, we
6115 need to compute the MINUS_EXPR into vectype temporary and
6116 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6117 tree vec_cvt_dest
= NULL_TREE
;
6118 if (orig_code
== POINTER_DIFF_EXPR
)
6120 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6121 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6125 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6127 /* In case the vectorization factor (VF) is bigger than the number
6128 of elements that we can fit in a vectype (nunits), we have to generate
6129 more than one vector stmt - i.e - we need to "unroll" the
6130 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6131 from one copy of the vector stmt to the next, in the field
6132 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6133 stages to find the correct vector defs to be used when vectorizing
6134 stmts that use the defs of the current stmt. The example below
6135 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6136 we need to create 4 vectorized stmts):
6138 before vectorization:
6139 RELATED_STMT VEC_STMT
6143 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6145 RELATED_STMT VEC_STMT
6146 VS1_0: vx0 = memref0 VS1_1 -
6147 VS1_1: vx1 = memref1 VS1_2 -
6148 VS1_2: vx2 = memref2 VS1_3 -
6149 VS1_3: vx3 = memref3 - -
6150 S1: x = load - VS1_0
6153 step2: vectorize stmt S2 (done here):
6154 To vectorize stmt S2 we first need to find the relevant vector
6155 def for the first operand 'x'. This is, as usual, obtained from
6156 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6157 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6158 relevant vector def 'vx0'. Having found 'vx0' we can generate
6159 the vector stmt VS2_0, and as usual, record it in the
6160 STMT_VINFO_VEC_STMT of stmt S2.
6161 When creating the second copy (VS2_1), we obtain the relevant vector
6162 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6163 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6164 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6165 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6166 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6167 chain of stmts and pointers:
6168 RELATED_STMT VEC_STMT
6169 VS1_0: vx0 = memref0 VS1_1 -
6170 VS1_1: vx1 = memref1 VS1_2 -
6171 VS1_2: vx2 = memref2 VS1_3 -
6172 VS1_3: vx3 = memref3 - -
6173 S1: x = load - VS1_0
6174 VS2_0: vz0 = vx0 + v1 VS2_1 -
6175 VS2_1: vz1 = vx1 + v1 VS2_2 -
6176 VS2_2: vz2 = vx2 + v1 VS2_3 -
6177 VS2_3: vz3 = vx3 + v1 - -
6178 S2: z = x + 1 - VS2_0 */
6180 prev_stmt_info
= NULL
;
6181 for (j
= 0; j
< ncopies
; j
++)
6186 if (op_type
== binary_op
)
6187 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6189 else if (op_type
== ternary_op
)
6193 auto_vec
<tree
> ops(3);
6194 ops
.quick_push (op0
);
6195 ops
.quick_push (op1
);
6196 ops
.quick_push (op2
);
6197 auto_vec
<vec
<tree
> > vec_defs(3);
6198 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
6199 vec_oprnds0
= vec_defs
[0];
6200 vec_oprnds1
= vec_defs
[1];
6201 vec_oprnds2
= vec_defs
[2];
6205 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6206 &vec_oprnds1
, NULL
);
6207 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6212 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6217 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6218 if (op_type
== ternary_op
)
6220 tree vec_oprnd
= vec_oprnds2
.pop ();
6221 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6226 /* Arguments are ready. Create the new vector stmt. */
6227 stmt_vec_info new_stmt_info
= NULL
;
6228 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6230 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6231 ? vec_oprnds1
[i
] : NULL_TREE
);
6232 vop2
= ((op_type
== ternary_op
)
6233 ? vec_oprnds2
[i
] : NULL_TREE
);
6234 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6236 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6237 gimple_assign_set_lhs (new_stmt
, new_temp
);
6239 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6242 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6244 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6246 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6247 gimple_assign_set_lhs (new_stmt
, new_temp
);
6249 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6252 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6259 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6261 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6262 prev_stmt_info
= new_stmt_info
;
6265 vec_oprnds0
.release ();
6266 vec_oprnds1
.release ();
6267 vec_oprnds2
.release ();
6272 /* A helper function to ensure data reference DR_INFO's base alignment. */
6275 ensure_base_align (dr_vec_info
*dr_info
)
6277 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6280 if (dr_info
->base_misaligned
)
6282 tree base_decl
= dr_info
->base_decl
;
6284 // We should only be able to increase the alignment of a base object if
6285 // we know what its new alignment should be at compile time.
6286 unsigned HOST_WIDE_INT align_base_to
=
6287 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6289 if (decl_in_symtab_p (base_decl
))
6290 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6293 SET_DECL_ALIGN (base_decl
, align_base_to
);
6294 DECL_USER_ALIGN (base_decl
) = 1;
6296 dr_info
->base_misaligned
= false;
6301 /* Function get_group_alias_ptr_type.
6303 Return the alias type for the group starting at FIRST_STMT_INFO. */
6306 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6308 struct data_reference
*first_dr
, *next_dr
;
6310 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6311 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6312 while (next_stmt_info
)
6314 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6315 if (get_alias_set (DR_REF (first_dr
))
6316 != get_alias_set (DR_REF (next_dr
)))
6318 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_NOTE
, vect_location
,
6320 "conflicting alias set types.\n");
6321 return ptr_type_node
;
6323 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6325 return reference_alias_ptr_type (DR_REF (first_dr
));
6329 /* Function scan_operand_equal_p.
6331 Helper function for check_scan_store. Compare two references
6332 with .GOMP_SIMD_LANE bases. */
6335 scan_operand_equal_p (tree ref1
, tree ref2
)
6337 tree ref
[2] = { ref1
, ref2
};
6338 poly_int64 bitsize
[2], bitpos
[2];
6339 tree offset
[2], base
[2];
6340 for (int i
= 0; i
< 2; ++i
)
6343 int unsignedp
, reversep
, volatilep
= 0;
6344 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6345 &offset
[i
], &mode
, &unsignedp
,
6346 &reversep
, &volatilep
);
6347 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6349 if (TREE_CODE (base
[i
]) == MEM_REF
6350 && offset
[i
] == NULL_TREE
6351 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6353 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6354 if (is_gimple_assign (def_stmt
)
6355 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6356 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6357 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6359 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6361 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6362 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6367 if (!operand_equal_p (base
[0], base
[1], 0))
6369 if (maybe_ne (bitsize
[0], bitsize
[1]))
6371 if (offset
[0] != offset
[1])
6373 if (!offset
[0] || !offset
[1])
6375 if (!operand_equal_p (offset
[0], offset
[1], 0))
6378 for (int i
= 0; i
< 2; ++i
)
6380 step
[i
] = integer_one_node
;
6381 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6383 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6384 if (is_gimple_assign (def_stmt
)
6385 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6386 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6389 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6390 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6393 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6395 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6396 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6398 tree rhs1
= NULL_TREE
;
6399 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6401 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6402 if (gimple_assign_cast_p (def_stmt
))
6403 rhs1
= gimple_assign_rhs1 (def_stmt
);
6405 else if (CONVERT_EXPR_P (offset
[i
]))
6406 rhs1
= TREE_OPERAND (offset
[i
], 0);
6408 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6409 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6410 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6411 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6414 if (!operand_equal_p (offset
[0], offset
[1], 0)
6415 || !operand_equal_p (step
[0], step
[1], 0))
6423 enum scan_store_kind
{
6424 /* Normal permutation. */
6425 scan_store_kind_perm
,
6427 /* Whole vector left shift permutation with zero init. */
6428 scan_store_kind_lshift_zero
,
6430 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6431 scan_store_kind_lshift_cond
6434 /* Function check_scan_store.
6436 Verify if we can perform the needed permutations or whole vector shifts.
6437 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6438 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6439 to do at each step. */
6442 scan_store_can_perm_p (tree vectype
, tree init
,
6443 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6445 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6446 unsigned HOST_WIDE_INT nunits
;
6447 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6449 int units_log2
= exact_log2 (nunits
);
6450 if (units_log2
<= 0)
6454 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6455 for (i
= 0; i
<= units_log2
; ++i
)
6457 unsigned HOST_WIDE_INT j
, k
;
6458 enum scan_store_kind kind
= scan_store_kind_perm
;
6459 vec_perm_builder
sel (nunits
, nunits
, 1);
6460 sel
.quick_grow (nunits
);
6461 if (i
== units_log2
)
6463 for (j
= 0; j
< nunits
; ++j
)
6464 sel
[j
] = nunits
- 1;
6468 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6470 for (k
= 0; j
< nunits
; ++j
, ++k
)
6471 sel
[j
] = nunits
+ k
;
6473 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6474 if (!can_vec_perm_const_p (vec_mode
, indices
))
6476 if (i
== units_log2
)
6479 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6481 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6483 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6484 /* Whole vector shifts shift in zeros, so if init is all zero
6485 constant, there is no need to do anything further. */
6486 if ((TREE_CODE (init
) != INTEGER_CST
6487 && TREE_CODE (init
) != REAL_CST
)
6488 || !initializer_zerop (init
))
6490 tree masktype
= build_same_sized_truth_vector_type (vectype
);
6491 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6493 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6496 kind
= whole_vector_shift_kind
;
6498 if (use_whole_vector
)
6500 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6501 use_whole_vector
->safe_grow_cleared (i
);
6502 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6503 use_whole_vector
->safe_push (kind
);
6511 /* Function check_scan_store.
6513 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6516 check_scan_store (stmt_vec_info stmt_info
, tree vectype
,
6517 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6518 vect_memory_access_type memory_access_type
)
6520 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6521 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6524 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6527 || memory_access_type
!= VMAT_CONTIGUOUS
6528 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6529 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6530 || loop_vinfo
== NULL
6531 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6532 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6533 || !integer_zerop (DR_OFFSET (dr_info
->dr
))
6534 || !integer_zerop (DR_INIT (dr_info
->dr
))
6535 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6536 || !alias_sets_conflict_p (get_alias_set (vectype
),
6537 get_alias_set (TREE_TYPE (ref_type
))))
6539 if (dump_enabled_p ())
6540 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6541 "unsupported OpenMP scan store.\n");
6545 /* We need to pattern match code built by OpenMP lowering and simplified
6546 by following optimizations into something we can handle.
6547 #pragma omp simd reduction(inscan,+:r)
6551 #pragma omp scan inclusive (r)
6554 shall have body with:
6555 // Initialization for input phase, store the reduction initializer:
6556 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6557 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6559 // Actual input phase:
6561 r.0_5 = D.2042[_20];
6564 // Initialization for scan phase:
6565 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6571 // Actual scan phase:
6573 r.1_8 = D.2042[_20];
6575 The "omp simd array" variable D.2042 holds the privatized copy used
6576 inside of the loop and D.2043 is another one that holds copies of
6577 the current original list item. The separate GOMP_SIMD_LANE ifn
6578 kinds are there in order to allow optimizing the initializer store
6579 and combiner sequence, e.g. if it is originally some C++ish user
6580 defined reduction, but allow the vectorizer to pattern recognize it
6581 and turn into the appropriate vectorized scan.
6583 For exclusive scan, this is slightly different:
6584 #pragma omp simd reduction(inscan,+:r)
6588 #pragma omp scan exclusive (r)
6591 shall have body with:
6592 // Initialization for input phase, store the reduction initializer:
6593 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6594 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6596 // Actual input phase:
6598 r.0_5 = D.2042[_20];
6601 // Initialization for scan phase:
6602 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6608 // Actual scan phase:
6610 r.1_8 = D.2044[_20];
6613 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6615 /* Match the D.2042[_21] = 0; store above. Just require that
6616 it is a constant or external definition store. */
6617 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6620 if (dump_enabled_p ())
6621 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6622 "unsupported OpenMP scan initializer store.\n");
6626 if (! loop_vinfo
->scan_map
)
6627 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6628 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6629 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6632 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6634 /* These stores can be vectorized normally. */
6638 if (rhs_dt
!= vect_internal_def
)
6641 if (dump_enabled_p ())
6642 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6643 "unsupported OpenMP scan combiner pattern.\n");
6647 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6648 tree rhs
= gimple_assign_rhs1 (stmt
);
6649 if (TREE_CODE (rhs
) != SSA_NAME
)
6652 gimple
*other_store_stmt
= NULL
;
6653 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6654 bool inscan_var_store
6655 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6657 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6659 if (!inscan_var_store
)
6661 use_operand_p use_p
;
6662 imm_use_iterator iter
;
6663 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6665 gimple
*use_stmt
= USE_STMT (use_p
);
6666 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6668 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6669 || !is_gimple_assign (use_stmt
)
6670 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6672 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6674 other_store_stmt
= use_stmt
;
6676 if (other_store_stmt
== NULL
)
6678 rhs
= gimple_assign_lhs (other_store_stmt
);
6679 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6683 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6685 use_operand_p use_p
;
6686 imm_use_iterator iter
;
6687 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6689 gimple
*use_stmt
= USE_STMT (use_p
);
6690 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6692 if (other_store_stmt
)
6694 other_store_stmt
= use_stmt
;
6700 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6701 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6702 || !is_gimple_assign (def_stmt
)
6703 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6706 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6707 /* For pointer addition, we should use the normal plus for the vector
6711 case POINTER_PLUS_EXPR
:
6714 case MULT_HIGHPART_EXPR
:
6719 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6722 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6723 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6724 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6727 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6728 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6729 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6730 || !gimple_assign_load_p (load1_stmt
)
6731 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6732 || !gimple_assign_load_p (load2_stmt
))
6735 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6736 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6737 if (load1_stmt_info
== NULL
6738 || load2_stmt_info
== NULL
6739 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6740 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6741 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6742 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6745 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6747 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6748 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6749 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6751 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6753 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6757 use_operand_p use_p
;
6758 imm_use_iterator iter
;
6759 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6761 gimple
*use_stmt
= USE_STMT (use_p
);
6762 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6764 if (other_store_stmt
)
6766 other_store_stmt
= use_stmt
;
6770 if (other_store_stmt
== NULL
)
6772 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6773 || !gimple_store_p (other_store_stmt
))
6776 stmt_vec_info other_store_stmt_info
6777 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6778 if (other_store_stmt_info
== NULL
6779 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6780 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6783 gimple
*stmt1
= stmt
;
6784 gimple
*stmt2
= other_store_stmt
;
6785 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6786 std::swap (stmt1
, stmt2
);
6787 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6788 gimple_assign_rhs1 (load2_stmt
)))
6790 std::swap (rhs1
, rhs2
);
6791 std::swap (load1_stmt
, load2_stmt
);
6792 std::swap (load1_stmt_info
, load2_stmt_info
);
6794 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6795 gimple_assign_rhs1 (load1_stmt
)))
6798 tree var3
= NULL_TREE
;
6799 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6800 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6801 gimple_assign_rhs1 (load2_stmt
)))
6803 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6805 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6806 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6807 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6809 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6810 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6811 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6812 || lookup_attribute ("omp simd inscan exclusive",
6813 DECL_ATTRIBUTES (var3
)))
6817 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6818 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6819 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6822 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6823 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6824 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6825 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6826 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6827 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6830 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6831 std::swap (var1
, var2
);
6833 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6835 if (!lookup_attribute ("omp simd inscan exclusive",
6836 DECL_ATTRIBUTES (var1
)))
6841 if (loop_vinfo
->scan_map
== NULL
)
6843 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6847 /* The IL is as expected, now check if we can actually vectorize it.
6854 should be vectorized as (where _40 is the vectorized rhs
6855 from the D.2042[_21] = 0; store):
6856 _30 = MEM <vector(8) int> [(int *)&D.2043];
6857 _31 = MEM <vector(8) int> [(int *)&D.2042];
6858 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6860 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6861 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6863 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6864 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6865 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6867 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6868 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6870 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6871 MEM <vector(8) int> [(int *)&D.2043] = _39;
6872 MEM <vector(8) int> [(int *)&D.2042] = _38;
6879 should be vectorized as (where _40 is the vectorized rhs
6880 from the D.2042[_21] = 0; store):
6881 _30 = MEM <vector(8) int> [(int *)&D.2043];
6882 _31 = MEM <vector(8) int> [(int *)&D.2042];
6883 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6884 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6886 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6887 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6888 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6890 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6891 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6892 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6894 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6895 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6898 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6899 MEM <vector(8) int> [(int *)&D.2044] = _39;
6900 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6901 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6902 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6903 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6906 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6907 if (units_log2
== -1)
6914 /* Function vectorizable_scan_store.
6916 Helper of vectorizable_score, arguments like on vectorizable_store.
6917 Handle only the transformation, checking is done in check_scan_store. */
6920 vectorizable_scan_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6921 stmt_vec_info
*vec_stmt
, int ncopies
)
6923 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6924 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6925 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
6926 vec_info
*vinfo
= stmt_info
->vinfo
;
6927 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6929 if (dump_enabled_p ())
6930 dump_printf_loc (MSG_NOTE
, vect_location
,
6931 "transform scan store. ncopies = %d\n", ncopies
);
6933 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6934 tree rhs
= gimple_assign_rhs1 (stmt
);
6935 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
6937 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6938 bool inscan_var_store
6939 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6941 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6943 use_operand_p use_p
;
6944 imm_use_iterator iter
;
6945 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6947 gimple
*use_stmt
= USE_STMT (use_p
);
6948 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6950 rhs
= gimple_assign_lhs (use_stmt
);
6955 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6956 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6957 if (code
== POINTER_PLUS_EXPR
)
6959 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
6960 && commutative_tree_code (code
));
6961 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6962 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6963 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
6964 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6965 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6966 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6967 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6968 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6969 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6970 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6971 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6973 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6975 std::swap (rhs1
, rhs2
);
6976 std::swap (var1
, var2
);
6977 std::swap (load1_dr_info
, load2_dr_info
);
6980 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6983 unsigned HOST_WIDE_INT nunits
;
6984 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6986 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
6987 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
6988 gcc_assert (units_log2
> 0);
6989 auto_vec
<tree
, 16> perms
;
6990 perms
.quick_grow (units_log2
+ 1);
6991 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
6992 for (int i
= 0; i
<= units_log2
; ++i
)
6994 unsigned HOST_WIDE_INT j
, k
;
6995 vec_perm_builder
sel (nunits
, nunits
, 1);
6996 sel
.quick_grow (nunits
);
6997 if (i
== units_log2
)
6998 for (j
= 0; j
< nunits
; ++j
)
6999 sel
[j
] = nunits
- 1;
7002 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7004 for (k
= 0; j
< nunits
; ++j
, ++k
)
7005 sel
[j
] = nunits
+ k
;
7007 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7008 if (!use_whole_vector
.is_empty ()
7009 && use_whole_vector
[i
] != scan_store_kind_perm
)
7011 if (zero_vec
== NULL_TREE
)
7012 zero_vec
= build_zero_cst (vectype
);
7013 if (masktype
== NULL_TREE
7014 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7015 masktype
= build_same_sized_truth_vector_type (vectype
);
7016 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7019 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7022 stmt_vec_info prev_stmt_info
= NULL
;
7023 tree vec_oprnd1
= NULL_TREE
;
7024 tree vec_oprnd2
= NULL_TREE
;
7025 tree vec_oprnd3
= NULL_TREE
;
7026 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7027 tree dataref_offset
= build_int_cst (ref_type
, 0);
7028 tree bump
= vect_get_data_ptr_increment (dr_info
, vectype
, VMAT_CONTIGUOUS
);
7029 tree ldataref_ptr
= NULL_TREE
;
7030 tree orig
= NULL_TREE
;
7031 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7032 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7033 for (int j
= 0; j
< ncopies
; j
++)
7035 stmt_vec_info new_stmt_info
;
7038 vec_oprnd1
= vect_get_vec_def_for_operand (*init
, stmt_info
);
7039 if (ldataref_ptr
== NULL
)
7040 vec_oprnd2
= vect_get_vec_def_for_operand (rhs1
, stmt_info
);
7041 vec_oprnd3
= vect_get_vec_def_for_operand (rhs2
, stmt_info
);
7046 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
7047 if (ldataref_ptr
== NULL
)
7048 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
7049 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
7050 if (!inscan_var_store
)
7051 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7056 vec_oprnd2
= make_ssa_name (vectype
);
7057 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7058 unshare_expr (ldataref_ptr
),
7060 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7061 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7062 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7063 if (prev_stmt_info
== NULL
)
7064 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7066 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7067 prev_stmt_info
= new_stmt_info
;
7070 tree v
= vec_oprnd2
;
7071 for (int i
= 0; i
< units_log2
; ++i
)
7073 tree new_temp
= make_ssa_name (vectype
);
7074 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7076 && (use_whole_vector
[i
]
7077 != scan_store_kind_perm
))
7078 ? zero_vec
: vec_oprnd1
, v
,
7080 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7081 if (prev_stmt_info
== NULL
)
7082 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7084 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7085 prev_stmt_info
= new_stmt_info
;
7087 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7089 /* Whole vector shift shifted in zero bits, but if *init
7090 is not initializer_zerop, we need to replace those elements
7091 with elements from vec_oprnd1. */
7092 tree_vector_builder
vb (masktype
, nunits
, 1);
7093 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7094 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7095 ? boolean_false_node
: boolean_true_node
);
7097 tree new_temp2
= make_ssa_name (vectype
);
7098 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7099 new_temp
, vec_oprnd1
);
7100 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7101 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7102 prev_stmt_info
= new_stmt_info
;
7103 new_temp
= new_temp2
;
7106 /* For exclusive scan, perform the perms[i] permutation once
7109 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7117 tree new_temp2
= make_ssa_name (vectype
);
7118 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7119 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7120 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7121 prev_stmt_info
= new_stmt_info
;
7126 tree new_temp
= make_ssa_name (vectype
);
7127 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7128 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7129 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7130 prev_stmt_info
= new_stmt_info
;
7132 tree last_perm_arg
= new_temp
;
7133 /* For exclusive scan, new_temp computed above is the exclusive scan
7134 prefix sum. Turn it into inclusive prefix sum for the broadcast
7135 of the last element into orig. */
7136 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7138 last_perm_arg
= make_ssa_name (vectype
);
7139 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7140 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7141 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7142 prev_stmt_info
= new_stmt_info
;
7145 orig
= make_ssa_name (vectype
);
7146 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7147 last_perm_arg
, perms
[units_log2
]);
7148 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7149 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7150 prev_stmt_info
= new_stmt_info
;
7152 if (!inscan_var_store
)
7154 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7155 unshare_expr (dataref_ptr
),
7157 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7158 g
= gimple_build_assign (data_ref
, new_temp
);
7159 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7160 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7161 prev_stmt_info
= new_stmt_info
;
7165 if (inscan_var_store
)
7166 for (int j
= 0; j
< ncopies
; j
++)
7169 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7171 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7172 unshare_expr (dataref_ptr
),
7174 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7175 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7176 stmt_vec_info new_stmt_info
7177 = vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7178 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7179 prev_stmt_info
= new_stmt_info
;
7185 /* Function vectorizable_store.
7187 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7188 that can be vectorized.
7189 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7190 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7191 Return true if STMT_INFO is vectorizable in this way. */
7194 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7195 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7196 stmt_vector_for_cost
*cost_vec
)
7200 tree vec_oprnd
= NULL_TREE
;
7202 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7203 class loop
*loop
= NULL
;
7204 machine_mode vec_mode
;
7206 enum dr_alignment_support alignment_support_scheme
;
7207 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7208 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7209 stmt_vec_info prev_stmt_info
= NULL
;
7210 tree dataref_ptr
= NULL_TREE
;
7211 tree dataref_offset
= NULL_TREE
;
7212 gimple
*ptr_incr
= NULL
;
7215 stmt_vec_info first_stmt_info
;
7217 unsigned int group_size
, i
;
7218 vec
<tree
> oprnds
= vNULL
;
7219 vec
<tree
> result_chain
= vNULL
;
7220 tree offset
= NULL_TREE
;
7221 vec
<tree
> vec_oprnds
= vNULL
;
7222 bool slp
= (slp_node
!= NULL
);
7223 unsigned int vec_num
;
7224 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7225 vec_info
*vinfo
= stmt_info
->vinfo
;
7227 gather_scatter_info gs_info
;
7229 vec_load_store_type vls_type
;
7232 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7235 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7239 /* Is vectorizable store? */
7241 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7242 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7244 tree scalar_dest
= gimple_assign_lhs (assign
);
7245 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7246 && is_pattern_stmt_p (stmt_info
))
7247 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7248 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7249 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7250 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7251 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7252 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7253 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7254 && TREE_CODE (scalar_dest
) != MEM_REF
)
7259 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7260 if (!call
|| !gimple_call_internal_p (call
))
7263 internal_fn ifn
= gimple_call_internal_fn (call
);
7264 if (!internal_store_fn_p (ifn
))
7267 if (slp_node
!= NULL
)
7269 if (dump_enabled_p ())
7270 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7271 "SLP of masked stores not supported.\n");
7275 int mask_index
= internal_fn_mask_index (ifn
);
7276 if (mask_index
>= 0)
7278 mask
= gimple_call_arg (call
, mask_index
);
7279 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
7285 op
= vect_get_store_rhs (stmt_info
);
7287 /* Cannot have hybrid store SLP -- that would mean storing to the
7288 same location twice. */
7289 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7291 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7292 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7296 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7297 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7302 /* Multiple types in SLP are handled by creating the appropriate number of
7303 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7308 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7310 gcc_assert (ncopies
>= 1);
7312 /* FORNOW. This restriction should be relaxed. */
7313 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7315 if (dump_enabled_p ())
7316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7317 "multiple types in nested loop.\n");
7321 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7324 elem_type
= TREE_TYPE (vectype
);
7325 vec_mode
= TYPE_MODE (vectype
);
7327 if (!STMT_VINFO_DATA_REF (stmt_info
))
7330 vect_memory_access_type memory_access_type
;
7331 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
7332 &memory_access_type
, &gs_info
))
7337 if (memory_access_type
== VMAT_CONTIGUOUS
)
7339 if (!VECTOR_MODE_P (vec_mode
)
7340 || !can_vec_mask_load_store_p (vec_mode
,
7341 TYPE_MODE (mask_vectype
), false))
7344 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7345 && (memory_access_type
!= VMAT_GATHER_SCATTER
7346 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7348 if (dump_enabled_p ())
7349 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7350 "unsupported access type for masked store.\n");
7356 /* FORNOW. In some cases can vectorize even if data-type not supported
7357 (e.g. - array initialization with 0). */
7358 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7362 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7363 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7364 && memory_access_type
!= VMAT_GATHER_SCATTER
7365 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7368 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7369 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7370 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7374 first_stmt_info
= stmt_info
;
7375 first_dr_info
= dr_info
;
7376 group_size
= vec_num
= 1;
7379 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7381 if (!check_scan_store (stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7382 memory_access_type
))
7386 if (!vec_stmt
) /* transformation not required. */
7388 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7391 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7392 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7393 memory_access_type
, &gs_info
);
7395 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7396 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
7397 vls_type
, slp_node
, cost_vec
);
7400 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7404 ensure_base_align (dr_info
);
7406 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7408 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7409 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7410 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7411 tree ptr
, var
, scale
, vec_mask
;
7412 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7413 tree mask_halfvectype
= mask_vectype
;
7414 edge pe
= loop_preheader_edge (loop
);
7417 enum { NARROW
, NONE
, WIDEN
} modifier
;
7418 poly_uint64 scatter_off_nunits
7419 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7421 if (known_eq (nunits
, scatter_off_nunits
))
7423 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7427 /* Currently gathers and scatters are only supported for
7428 fixed-length vectors. */
7429 unsigned int count
= scatter_off_nunits
.to_constant ();
7430 vec_perm_builder
sel (count
, count
, 1);
7431 for (i
= 0; i
< (unsigned int) count
; ++i
)
7432 sel
.quick_push (i
| (count
/ 2));
7434 vec_perm_indices
indices (sel
, 1, count
);
7435 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7437 gcc_assert (perm_mask
!= NULL_TREE
);
7439 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7443 /* Currently gathers and scatters are only supported for
7444 fixed-length vectors. */
7445 unsigned int count
= nunits
.to_constant ();
7446 vec_perm_builder
sel (count
, count
, 1);
7447 for (i
= 0; i
< (unsigned int) count
; ++i
)
7448 sel
.quick_push (i
| (count
/ 2));
7450 vec_perm_indices
indices (sel
, 2, count
);
7451 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7452 gcc_assert (perm_mask
!= NULL_TREE
);
7457 = build_same_sized_truth_vector_type (gs_info
.offset_vectype
);
7462 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7463 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7464 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7465 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7466 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7467 scaletype
= TREE_VALUE (arglist
);
7469 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7470 && TREE_CODE (rettype
) == VOID_TYPE
);
7472 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7473 if (!is_gimple_min_invariant (ptr
))
7475 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7476 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7477 gcc_assert (!new_bb
);
7480 if (mask
== NULL_TREE
)
7482 mask_arg
= build_int_cst (masktype
, -1);
7483 mask_arg
= vect_init_vector (stmt_info
, mask_arg
, masktype
, NULL
);
7486 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7488 prev_stmt_info
= NULL
;
7489 for (j
= 0; j
< ncopies
; ++j
)
7493 src
= vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt_info
);
7494 op
= vec_oprnd0
= vect_get_vec_def_for_operand (gs_info
.offset
,
7497 mask_op
= vec_mask
= vect_get_vec_def_for_operand (mask
,
7500 else if (modifier
!= NONE
&& (j
& 1))
7502 if (modifier
== WIDEN
)
7505 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7507 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
7511 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7514 else if (modifier
== NARROW
)
7516 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
7518 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7526 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7528 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7531 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7535 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7537 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7538 TYPE_VECTOR_SUBPARTS (srctype
)));
7539 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7540 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7542 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7543 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7547 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7549 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7550 TYPE_VECTOR_SUBPARTS (idxtype
)));
7551 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7552 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7554 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7555 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7563 if (modifier
== NARROW
)
7565 var
= vect_get_new_ssa_name (mask_halfvectype
,
7568 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7569 : VEC_UNPACK_LO_EXPR
,
7571 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7574 tree optype
= TREE_TYPE (mask_arg
);
7575 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7578 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7579 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7580 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7582 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7583 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7585 if (!useless_type_conversion_p (masktype
, utype
))
7587 gcc_assert (TYPE_PRECISION (utype
)
7588 <= TYPE_PRECISION (masktype
));
7589 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7590 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7591 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7597 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7598 stmt_vec_info new_stmt_info
7599 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7601 if (prev_stmt_info
== NULL
)
7602 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7604 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7605 prev_stmt_info
= new_stmt_info
;
7609 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7610 return vectorizable_scan_store (stmt_info
, gsi
, vec_stmt
, ncopies
);
7612 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7613 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7618 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7620 /* We vectorize all the stmts of the interleaving group when we
7621 reach the last stmt in the group. */
7622 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7623 < DR_GROUP_SIZE (first_stmt_info
)
7632 grouped_store
= false;
7633 /* VEC_NUM is the number of vect stmts to be created for this
7635 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7636 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7637 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7638 == first_stmt_info
);
7639 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7640 op
= vect_get_store_rhs (first_stmt_info
);
7643 /* VEC_NUM is the number of vect stmts to be created for this
7645 vec_num
= group_size
;
7647 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7650 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7652 if (dump_enabled_p ())
7653 dump_printf_loc (MSG_NOTE
, vect_location
,
7654 "transform store. ncopies = %d\n", ncopies
);
7656 if (memory_access_type
== VMAT_ELEMENTWISE
7657 || memory_access_type
== VMAT_STRIDED_SLP
)
7659 gimple_stmt_iterator incr_gsi
;
7665 tree stride_base
, stride_step
, alias_off
;
7668 /* Checked by get_load_store_type. */
7669 unsigned int const_nunits
= nunits
.to_constant ();
7671 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7672 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7675 = fold_build_pointer_plus
7676 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7677 size_binop (PLUS_EXPR
,
7678 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
7679 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7680 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7682 /* For a store with loop-invariant (but other than power-of-2)
7683 stride (i.e. not a grouped access) like so:
7685 for (i = 0; i < n; i += stride)
7688 we generate a new induction variable and new stores from
7689 the components of the (vectorized) rhs:
7691 for (j = 0; ; j += VF*stride)
7696 array[j + stride] = tmp2;
7700 unsigned nstores
= const_nunits
;
7702 tree ltype
= elem_type
;
7703 tree lvectype
= vectype
;
7706 if (group_size
< const_nunits
7707 && const_nunits
% group_size
== 0)
7709 nstores
= const_nunits
/ group_size
;
7711 ltype
= build_vector_type (elem_type
, group_size
);
7714 /* First check if vec_extract optab doesn't support extraction
7715 of vector elts directly. */
7716 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7718 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
7719 || !VECTOR_MODE_P (vmode
)
7720 || !targetm
.vector_mode_supported_p (vmode
)
7721 || (convert_optab_handler (vec_extract_optab
,
7722 TYPE_MODE (vectype
), vmode
)
7723 == CODE_FOR_nothing
))
7725 /* Try to avoid emitting an extract of vector elements
7726 by performing the extracts using an integer type of the
7727 same size, extracting from a vector of those and then
7728 re-interpreting it as the original vector type if
7731 = group_size
* GET_MODE_BITSIZE (elmode
);
7732 unsigned int lnunits
= const_nunits
/ group_size
;
7733 /* If we can't construct such a vector fall back to
7734 element extracts from the original vector type and
7735 element size stores. */
7736 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7737 && mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7738 && VECTOR_MODE_P (vmode
)
7739 && targetm
.vector_mode_supported_p (vmode
)
7740 && (convert_optab_handler (vec_extract_optab
,
7742 != CODE_FOR_nothing
))
7746 ltype
= build_nonstandard_integer_type (lsize
, 1);
7747 lvectype
= build_vector_type (ltype
, nstores
);
7749 /* Else fall back to vector extraction anyway.
7750 Fewer stores are more important than avoiding spilling
7751 of the vector we extract from. Compared to the
7752 construction case in vectorizable_load no store-forwarding
7753 issue exists here for reasonable archs. */
7756 else if (group_size
>= const_nunits
7757 && group_size
% const_nunits
== 0)
7760 lnel
= const_nunits
;
7764 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7765 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7768 ivstep
= stride_step
;
7769 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7770 build_int_cst (TREE_TYPE (ivstep
), vf
));
7772 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7774 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7775 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7776 create_iv (stride_base
, ivstep
, NULL
,
7777 loop
, &incr_gsi
, insert_after
,
7779 incr
= gsi_stmt (incr_gsi
);
7780 loop_vinfo
->add_stmt (incr
);
7782 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7784 prev_stmt_info
= NULL
;
7785 alias_off
= build_int_cst (ref_type
, 0);
7786 stmt_vec_info next_stmt_info
= first_stmt_info
;
7787 for (g
= 0; g
< group_size
; g
++)
7789 running_off
= offvar
;
7792 tree size
= TYPE_SIZE_UNIT (ltype
);
7793 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7795 tree newoff
= copy_ssa_name (running_off
, NULL
);
7796 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7798 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7799 running_off
= newoff
;
7801 unsigned int group_el
= 0;
7802 unsigned HOST_WIDE_INT
7803 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7804 for (j
= 0; j
< ncopies
; j
++)
7806 /* We've set op and dt above, from vect_get_store_rhs,
7807 and first_stmt_info == stmt_info. */
7812 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
7813 &vec_oprnds
, NULL
, slp_node
);
7814 vec_oprnd
= vec_oprnds
[0];
7818 op
= vect_get_store_rhs (next_stmt_info
);
7819 vec_oprnd
= vect_get_vec_def_for_operand
7820 (op
, next_stmt_info
);
7826 vec_oprnd
= vec_oprnds
[j
];
7828 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
7831 /* Pun the vector to extract from if necessary. */
7832 if (lvectype
!= vectype
)
7834 tree tem
= make_ssa_name (lvectype
);
7836 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7837 lvectype
, vec_oprnd
));
7838 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
7841 for (i
= 0; i
< nstores
; i
++)
7843 tree newref
, newoff
;
7844 gimple
*incr
, *assign
;
7845 tree size
= TYPE_SIZE (ltype
);
7846 /* Extract the i'th component. */
7847 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7848 bitsize_int (i
), size
);
7849 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7852 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7856 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7858 newref
= build2 (MEM_REF
, ltype
,
7859 running_off
, this_off
);
7860 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7862 /* And store it to *running_off. */
7863 assign
= gimple_build_assign (newref
, elem
);
7864 stmt_vec_info assign_info
7865 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
7869 || group_el
== group_size
)
7871 newoff
= copy_ssa_name (running_off
, NULL
);
7872 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7873 running_off
, stride_step
);
7874 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7876 running_off
= newoff
;
7879 if (g
== group_size
- 1
7882 if (j
== 0 && i
== 0)
7883 STMT_VINFO_VEC_STMT (stmt_info
)
7884 = *vec_stmt
= assign_info
;
7886 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
7887 prev_stmt_info
= assign_info
;
7891 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7896 vec_oprnds
.release ();
7900 auto_vec
<tree
> dr_chain (group_size
);
7901 oprnds
.create (group_size
);
7903 alignment_support_scheme
7904 = vect_supportable_dr_alignment (first_dr_info
, false);
7905 gcc_assert (alignment_support_scheme
);
7906 vec_loop_masks
*loop_masks
7907 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7908 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7910 /* Targets with store-lane instructions must not require explicit
7911 realignment. vect_supportable_dr_alignment always returns either
7912 dr_aligned or dr_unaligned_supported for masked operations. */
7913 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7916 || alignment_support_scheme
== dr_aligned
7917 || alignment_support_scheme
== dr_unaligned_supported
);
7919 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
7920 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7921 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7924 tree vec_offset
= NULL_TREE
;
7925 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7927 aggr_type
= NULL_TREE
;
7930 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7932 aggr_type
= elem_type
;
7933 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7934 &bump
, &vec_offset
);
7938 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7939 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7941 aggr_type
= vectype
;
7942 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
7943 memory_access_type
);
7947 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
7949 /* In case the vectorization factor (VF) is bigger than the number
7950 of elements that we can fit in a vectype (nunits), we have to generate
7951 more than one vector stmt - i.e - we need to "unroll" the
7952 vector stmt by a factor VF/nunits. For more details see documentation in
7953 vect_get_vec_def_for_copy_stmt. */
7955 /* In case of interleaving (non-unit grouped access):
7962 We create vectorized stores starting from base address (the access of the
7963 first stmt in the chain (S2 in the above example), when the last store stmt
7964 of the chain (S4) is reached:
7967 VS2: &base + vec_size*1 = vx0
7968 VS3: &base + vec_size*2 = vx1
7969 VS4: &base + vec_size*3 = vx3
7971 Then permutation statements are generated:
7973 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7974 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7977 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7978 (the order of the data-refs in the output of vect_permute_store_chain
7979 corresponds to the order of scalar stmts in the interleaving chain - see
7980 the documentation of vect_permute_store_chain()).
7982 In case of both multiple types and interleaving, above vector stores and
7983 permutation stmts are created for every copy. The result vector stmts are
7984 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7985 STMT_VINFO_RELATED_STMT for the next copies.
7988 prev_stmt_info
= NULL
;
7989 tree vec_mask
= NULL_TREE
;
7990 for (j
= 0; j
< ncopies
; j
++)
7992 stmt_vec_info new_stmt_info
;
7997 /* Get vectorized arguments for SLP_NODE. */
7998 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
8001 vec_oprnd
= vec_oprnds
[0];
8005 /* For interleaved stores we collect vectorized defs for all the
8006 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8007 used as an input to vect_permute_store_chain(), and OPRNDS as
8008 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8010 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8011 OPRNDS are of size 1. */
8012 stmt_vec_info next_stmt_info
= first_stmt_info
;
8013 for (i
= 0; i
< group_size
; i
++)
8015 /* Since gaps are not supported for interleaved stores,
8016 DR_GROUP_SIZE is the exact number of stmts in the chain.
8017 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8018 that there is no interleaving, DR_GROUP_SIZE is 1,
8019 and only one iteration of the loop will be executed. */
8020 op
= vect_get_store_rhs (next_stmt_info
);
8021 vec_oprnd
= vect_get_vec_def_for_operand
8022 (op
, next_stmt_info
);
8023 dr_chain
.quick_push (vec_oprnd
);
8024 oprnds
.quick_push (vec_oprnd
);
8025 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8028 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
8032 /* We should have catched mismatched types earlier. */
8033 gcc_assert (useless_type_conversion_p (vectype
,
8034 TREE_TYPE (vec_oprnd
)));
8035 bool simd_lane_access_p
8036 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8037 if (simd_lane_access_p
8039 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8040 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8041 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
8042 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8043 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8044 get_alias_set (TREE_TYPE (ref_type
))))
8046 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8047 dataref_offset
= build_int_cst (ref_type
, 0);
8049 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8050 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
8051 &dataref_ptr
, &vec_offset
);
8054 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
8055 simd_lane_access_p
? loop
: NULL
,
8056 offset
, &dummy
, gsi
, &ptr_incr
,
8057 simd_lane_access_p
, NULL_TREE
, bump
);
8061 /* For interleaved stores we created vectorized defs for all the
8062 defs stored in OPRNDS in the previous iteration (previous copy).
8063 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8064 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8066 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8067 OPRNDS are of size 1. */
8068 for (i
= 0; i
< group_size
; i
++)
8071 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8072 dr_chain
[i
] = vec_oprnd
;
8073 oprnds
[i
] = vec_oprnd
;
8076 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8079 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8080 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8081 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8083 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8087 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8091 /* Get an array into which we can store the individual vectors. */
8092 vec_array
= create_vector_array (vectype
, vec_num
);
8094 /* Invalidate the current contents of VEC_ARRAY. This should
8095 become an RTL clobber too, which prevents the vector registers
8096 from being upward-exposed. */
8097 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8099 /* Store the individual vectors into the array. */
8100 for (i
= 0; i
< vec_num
; i
++)
8102 vec_oprnd
= dr_chain
[i
];
8103 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
8106 tree final_mask
= NULL
;
8108 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8111 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8118 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8120 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8121 tree alias_ptr
= build_int_cst (ref_type
, align
);
8122 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8123 dataref_ptr
, alias_ptr
,
8124 final_mask
, vec_array
);
8129 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8130 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8131 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8133 gimple_call_set_lhs (call
, data_ref
);
8135 gimple_call_set_nothrow (call
, true);
8136 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8138 /* Record that VEC_ARRAY is now dead. */
8139 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8143 new_stmt_info
= NULL
;
8147 result_chain
.create (group_size
);
8149 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
8153 stmt_vec_info next_stmt_info
= first_stmt_info
;
8154 for (i
= 0; i
< vec_num
; i
++)
8157 unsigned HOST_WIDE_INT align
;
8159 tree final_mask
= NULL_TREE
;
8161 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8163 vectype
, vec_num
* j
+ i
);
8165 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8168 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8170 tree scale
= size_int (gs_info
.scale
);
8173 call
= gimple_build_call_internal
8174 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8175 scale
, vec_oprnd
, final_mask
);
8177 call
= gimple_build_call_internal
8178 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8180 gimple_call_set_nothrow (call
, true);
8182 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8187 /* Bump the vector pointer. */
8188 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8192 vec_oprnd
= vec_oprnds
[i
];
8193 else if (grouped_store
)
8194 /* For grouped stores vectorized defs are interleaved in
8195 vect_permute_store_chain(). */
8196 vec_oprnd
= result_chain
[i
];
8198 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8199 if (aligned_access_p (first_dr_info
))
8201 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8203 align
= dr_alignment (vect_dr_behavior (first_dr_info
));
8207 misalign
= DR_MISALIGNMENT (first_dr_info
);
8208 if (dataref_offset
== NULL_TREE
8209 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8210 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8213 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8215 tree perm_mask
= perm_mask_for_reverse (vectype
);
8216 tree perm_dest
= vect_create_destination_var
8217 (vect_get_store_rhs (stmt_info
), vectype
);
8218 tree new_temp
= make_ssa_name (perm_dest
);
8220 /* Generate the permute statement. */
8222 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8223 vec_oprnd
, perm_mask
);
8224 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8226 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8227 vec_oprnd
= new_temp
;
8230 /* Arguments are ready. Create the new vector stmt. */
8233 align
= least_bit_hwi (misalign
| align
);
8234 tree ptr
= build_int_cst (ref_type
, align
);
8236 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8238 final_mask
, vec_oprnd
);
8239 gimple_call_set_nothrow (call
, true);
8241 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8245 data_ref
= fold_build2 (MEM_REF
, vectype
,
8249 : build_int_cst (ref_type
, 0));
8250 if (aligned_access_p (first_dr_info
))
8252 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8253 TREE_TYPE (data_ref
)
8254 = build_aligned_type (TREE_TYPE (data_ref
),
8255 align
* BITS_PER_UNIT
);
8257 TREE_TYPE (data_ref
)
8258 = build_aligned_type (TREE_TYPE (data_ref
),
8259 TYPE_ALIGN (elem_type
));
8260 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8262 = gimple_build_assign (data_ref
, vec_oprnd
);
8264 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8270 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8271 if (!next_stmt_info
)
8278 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8280 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8281 prev_stmt_info
= new_stmt_info
;
8286 result_chain
.release ();
8287 vec_oprnds
.release ();
8292 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8293 VECTOR_CST mask. No checks are made that the target platform supports the
8294 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8295 vect_gen_perm_mask_checked. */
8298 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8302 poly_uint64 nunits
= sel
.length ();
8303 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8305 mask_type
= build_vector_type (ssizetype
, nunits
);
8306 return vec_perm_indices_to_tree (mask_type
, sel
);
8309 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8310 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8313 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8315 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8316 return vect_gen_perm_mask_any (vectype
, sel
);
8319 /* Given a vector variable X and Y, that was generated for the scalar
8320 STMT_INFO, generate instructions to permute the vector elements of X and Y
8321 using permutation mask MASK_VEC, insert them at *GSI and return the
8322 permuted vector variable. */
8325 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8326 gimple_stmt_iterator
*gsi
)
8328 tree vectype
= TREE_TYPE (x
);
8329 tree perm_dest
, data_ref
;
8332 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8333 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8334 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8336 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8337 data_ref
= make_ssa_name (perm_dest
);
8339 /* Generate the permute statement. */
8340 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8341 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8346 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8347 inserting them on the loops preheader edge. Returns true if we
8348 were successful in doing so (and thus STMT_INFO can be moved then),
8349 otherwise returns false. */
8352 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8358 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8360 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8361 if (!gimple_nop_p (def_stmt
)
8362 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8364 /* Make sure we don't need to recurse. While we could do
8365 so in simple cases when there are more complex use webs
8366 we don't have an easy way to preserve stmt order to fulfil
8367 dependencies within them. */
8370 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8372 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8374 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8375 if (!gimple_nop_p (def_stmt2
)
8376 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8386 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8388 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8389 if (!gimple_nop_p (def_stmt
)
8390 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8392 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8393 gsi_remove (&gsi
, false);
8394 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8401 /* vectorizable_load.
8403 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8404 that can be vectorized.
8405 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8406 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8407 Return true if STMT_INFO is vectorizable in this way. */
8410 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8411 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8412 slp_instance slp_node_instance
,
8413 stmt_vector_for_cost
*cost_vec
)
8416 tree vec_dest
= NULL
;
8417 tree data_ref
= NULL
;
8418 stmt_vec_info prev_stmt_info
;
8419 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8420 class loop
*loop
= NULL
;
8421 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8422 bool nested_in_vect_loop
= false;
8427 enum dr_alignment_support alignment_support_scheme
;
8428 tree dataref_ptr
= NULL_TREE
;
8429 tree dataref_offset
= NULL_TREE
;
8430 gimple
*ptr_incr
= NULL
;
8433 unsigned int group_size
;
8434 poly_uint64 group_gap_adj
;
8435 tree msq
= NULL_TREE
, lsq
;
8436 tree offset
= NULL_TREE
;
8437 tree byte_offset
= NULL_TREE
;
8438 tree realignment_token
= NULL_TREE
;
8440 vec
<tree
> dr_chain
= vNULL
;
8441 bool grouped_load
= false;
8442 stmt_vec_info first_stmt_info
;
8443 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8444 bool compute_in_loop
= false;
8445 class loop
*at_loop
;
8447 bool slp
= (slp_node
!= NULL
);
8448 bool slp_perm
= false;
8449 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8452 gather_scatter_info gs_info
;
8453 vec_info
*vinfo
= stmt_info
->vinfo
;
8455 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8457 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8460 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8464 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8465 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8467 scalar_dest
= gimple_assign_lhs (assign
);
8468 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8471 tree_code code
= gimple_assign_rhs_code (assign
);
8472 if (code
!= ARRAY_REF
8473 && code
!= BIT_FIELD_REF
8474 && code
!= INDIRECT_REF
8475 && code
!= COMPONENT_REF
8476 && code
!= IMAGPART_EXPR
8477 && code
!= REALPART_EXPR
8479 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8484 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8485 if (!call
|| !gimple_call_internal_p (call
))
8488 internal_fn ifn
= gimple_call_internal_fn (call
);
8489 if (!internal_load_fn_p (ifn
))
8492 scalar_dest
= gimple_call_lhs (call
);
8496 int mask_index
= internal_fn_mask_index (ifn
);
8497 if (mask_index
>= 0)
8499 mask
= gimple_call_arg (call
, mask_index
);
8500 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
8506 if (!STMT_VINFO_DATA_REF (stmt_info
))
8509 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8510 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8514 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8515 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8516 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8521 /* Multiple types in SLP are handled by creating the appropriate number of
8522 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8527 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8529 gcc_assert (ncopies
>= 1);
8531 /* FORNOW. This restriction should be relaxed. */
8532 if (nested_in_vect_loop
&& ncopies
> 1)
8534 if (dump_enabled_p ())
8535 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8536 "multiple types in nested loop.\n");
8540 /* Invalidate assumptions made by dependence analysis when vectorization
8541 on the unrolled body effectively re-orders stmts. */
8543 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8544 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8545 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8547 if (dump_enabled_p ())
8548 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8549 "cannot perform implicit CSE when unrolling "
8550 "with negative dependence distance\n");
8554 elem_type
= TREE_TYPE (vectype
);
8555 mode
= TYPE_MODE (vectype
);
8557 /* FORNOW. In some cases can vectorize even if data-type not supported
8558 (e.g. - data copies). */
8559 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8561 if (dump_enabled_p ())
8562 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8563 "Aligned load, but unsupported type.\n");
8567 /* Check if the load is a part of an interleaving chain. */
8568 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8570 grouped_load
= true;
8572 gcc_assert (!nested_in_vect_loop
);
8573 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8575 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8576 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8578 /* Refuse non-SLP vectorization of SLP-only groups. */
8579 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8581 if (dump_enabled_p ())
8582 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8583 "cannot vectorize load in non-SLP mode.\n");
8587 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8590 /* Invalidate assumptions made by dependence analysis when vectorization
8591 on the unrolled body effectively re-orders stmts. */
8592 if (!PURE_SLP_STMT (stmt_info
)
8593 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8594 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8595 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8597 if (dump_enabled_p ())
8598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8599 "cannot perform implicit CSE when performing "
8600 "group loads with negative dependence distance\n");
8607 vect_memory_access_type memory_access_type
;
8608 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
8609 &memory_access_type
, &gs_info
))
8614 if (memory_access_type
== VMAT_CONTIGUOUS
)
8616 machine_mode vec_mode
= TYPE_MODE (vectype
);
8617 if (!VECTOR_MODE_P (vec_mode
)
8618 || !can_vec_mask_load_store_p (vec_mode
,
8619 TYPE_MODE (mask_vectype
), true))
8622 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8623 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8625 if (dump_enabled_p ())
8626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8627 "unsupported access type for masked load.\n");
8632 if (!vec_stmt
) /* transformation not required. */
8635 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8638 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8639 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8640 memory_access_type
, &gs_info
);
8642 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8643 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
8644 slp_node_instance
, slp_node
, cost_vec
);
8649 gcc_assert (memory_access_type
8650 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8652 if (dump_enabled_p ())
8653 dump_printf_loc (MSG_NOTE
, vect_location
,
8654 "transform load. ncopies = %d\n", ncopies
);
8658 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8659 ensure_base_align (dr_info
);
8661 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8663 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8667 if (memory_access_type
== VMAT_INVARIANT
)
8669 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8670 /* If we have versioned for aliasing or the loop doesn't
8671 have any data dependencies that would preclude this,
8672 then we are sure this is a loop invariant load and
8673 thus we can insert it on the preheader edge. */
8674 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8675 && !nested_in_vect_loop
8676 && hoist_defs_of_uses (stmt_info
, loop
));
8679 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8680 if (dump_enabled_p ())
8681 dump_printf_loc (MSG_NOTE
, vect_location
,
8682 "hoisting out of the vectorized loop: %G", stmt
);
8683 scalar_dest
= copy_ssa_name (scalar_dest
);
8684 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8685 gsi_insert_on_edge_immediate
8686 (loop_preheader_edge (loop
),
8687 gimple_build_assign (scalar_dest
, rhs
));
8689 /* These copies are all equivalent, but currently the representation
8690 requires a separate STMT_VINFO_VEC_STMT for each one. */
8691 prev_stmt_info
= NULL
;
8692 gimple_stmt_iterator gsi2
= *gsi
;
8694 for (j
= 0; j
< ncopies
; j
++)
8696 stmt_vec_info new_stmt_info
;
8699 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8701 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8702 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8706 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8708 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8711 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8713 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8715 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8716 prev_stmt_info
= new_stmt_info
;
8721 if (memory_access_type
== VMAT_ELEMENTWISE
8722 || memory_access_type
== VMAT_STRIDED_SLP
)
8724 gimple_stmt_iterator incr_gsi
;
8730 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8731 tree stride_base
, stride_step
, alias_off
;
8732 /* Checked by get_load_store_type. */
8733 unsigned int const_nunits
= nunits
.to_constant ();
8734 unsigned HOST_WIDE_INT cst_offset
= 0;
8736 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8737 gcc_assert (!nested_in_vect_loop
);
8741 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8742 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8746 first_stmt_info
= stmt_info
;
8747 first_dr_info
= dr_info
;
8749 if (slp
&& grouped_load
)
8751 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8752 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8758 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8759 * vect_get_place_in_interleaving_chain (stmt_info
,
8762 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8766 = fold_build_pointer_plus
8767 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8768 size_binop (PLUS_EXPR
,
8769 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
8770 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8771 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8773 /* For a load with loop-invariant (but other than power-of-2)
8774 stride (i.e. not a grouped access) like so:
8776 for (i = 0; i < n; i += stride)
8779 we generate a new induction variable and new accesses to
8780 form a new vector (or vectors, depending on ncopies):
8782 for (j = 0; ; j += VF*stride)
8784 tmp2 = array[j + stride];
8786 vectemp = {tmp1, tmp2, ...}
8789 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8790 build_int_cst (TREE_TYPE (stride_step
), vf
));
8792 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8794 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8795 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8796 create_iv (stride_base
, ivstep
, NULL
,
8797 loop
, &incr_gsi
, insert_after
,
8799 incr
= gsi_stmt (incr_gsi
);
8800 loop_vinfo
->add_stmt (incr
);
8802 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8804 prev_stmt_info
= NULL
;
8805 running_off
= offvar
;
8806 alias_off
= build_int_cst (ref_type
, 0);
8807 int nloads
= const_nunits
;
8809 tree ltype
= TREE_TYPE (vectype
);
8810 tree lvectype
= vectype
;
8811 auto_vec
<tree
> dr_chain
;
8812 if (memory_access_type
== VMAT_STRIDED_SLP
)
8814 if (group_size
< const_nunits
)
8816 /* First check if vec_init optab supports construction from
8817 vector elts directly. */
8818 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
8820 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
8821 && VECTOR_MODE_P (vmode
)
8822 && targetm
.vector_mode_supported_p (vmode
)
8823 && (convert_optab_handler (vec_init_optab
,
8824 TYPE_MODE (vectype
), vmode
)
8825 != CODE_FOR_nothing
))
8827 nloads
= const_nunits
/ group_size
;
8829 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
8833 /* Otherwise avoid emitting a constructor of vector elements
8834 by performing the loads using an integer type of the same
8835 size, constructing a vector of those and then
8836 re-interpreting it as the original vector type.
8837 This avoids a huge runtime penalty due to the general
8838 inability to perform store forwarding from smaller stores
8839 to a larger load. */
8841 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
8842 unsigned int lnunits
= const_nunits
/ group_size
;
8843 /* If we can't construct such a vector fall back to
8844 element loads of the original vector type. */
8845 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8846 && mode_for_vector (elmode
, lnunits
).exists (&vmode
)
8847 && VECTOR_MODE_P (vmode
)
8848 && targetm
.vector_mode_supported_p (vmode
)
8849 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
8850 != CODE_FOR_nothing
))
8854 ltype
= build_nonstandard_integer_type (lsize
, 1);
8855 lvectype
= build_vector_type (ltype
, nloads
);
8862 lnel
= const_nunits
;
8865 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8867 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8868 else if (nloads
== 1)
8873 /* For SLP permutation support we need to load the whole group,
8874 not only the number of vector stmts the permutation result
8878 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8880 unsigned int const_vf
= vf
.to_constant ();
8881 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8882 dr_chain
.create (ncopies
);
8885 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8887 unsigned int group_el
= 0;
8888 unsigned HOST_WIDE_INT
8889 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8890 for (j
= 0; j
< ncopies
; j
++)
8893 vec_alloc (v
, nloads
);
8894 stmt_vec_info new_stmt_info
= NULL
;
8895 for (i
= 0; i
< nloads
; i
++)
8897 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8898 group_el
* elsz
+ cst_offset
);
8899 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
8900 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8902 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
8904 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8906 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
8907 gimple_assign_lhs (new_stmt
));
8911 || group_el
== group_size
)
8913 tree newoff
= copy_ssa_name (running_off
);
8914 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8915 running_off
, stride_step
);
8916 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
8918 running_off
= newoff
;
8924 tree vec_inv
= build_constructor (lvectype
, v
);
8925 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
8926 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8927 if (lvectype
!= vectype
)
8930 = gimple_build_assign (make_ssa_name (vectype
),
8932 build1 (VIEW_CONVERT_EXPR
,
8933 vectype
, new_temp
));
8935 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8942 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
8944 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8949 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8951 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8952 prev_stmt_info
= new_stmt_info
;
8958 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8959 slp_node_instance
, false, &n_perms
);
8964 if (memory_access_type
== VMAT_GATHER_SCATTER
8965 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
8966 grouped_load
= false;
8970 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8971 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8972 /* For SLP vectorization we directly vectorize a subchain
8973 without permutation. */
8974 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8975 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8976 /* For BB vectorization always use the first stmt to base
8977 the data ref pointer on. */
8979 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8981 /* Check if the chain of loads is already vectorized. */
8982 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
8983 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8984 ??? But we can only do so if there is exactly one
8985 as we have no way to get at the rest. Leave the CSE
8987 ??? With the group load eventually participating
8988 in multiple different permutations (having multiple
8989 slp nodes which refer to the same group) the CSE
8990 is even wrong code. See PR56270. */
8993 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8996 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8999 /* VEC_NUM is the number of vect stmts to be created for this group. */
9002 grouped_load
= false;
9003 /* If an SLP permutation is from N elements to N elements,
9004 and if one vector holds a whole number of N, we can load
9005 the inputs to the permutation in the same way as an
9006 unpermuted sequence. In other cases we need to load the
9007 whole group, not only the number of vector stmts the
9008 permutation result fits in. */
9010 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
9011 || !multiple_p (nunits
, group_size
)))
9013 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9014 variable VF; see vect_transform_slp_perm_load. */
9015 unsigned int const_vf
= vf
.to_constant ();
9016 unsigned int const_nunits
= nunits
.to_constant ();
9017 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9018 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9022 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9024 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
9028 vec_num
= group_size
;
9030 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9034 first_stmt_info
= stmt_info
;
9035 first_dr_info
= dr_info
;
9036 group_size
= vec_num
= 1;
9038 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9041 alignment_support_scheme
9042 = vect_supportable_dr_alignment (first_dr_info
, false);
9043 gcc_assert (alignment_support_scheme
);
9044 vec_loop_masks
*loop_masks
9045 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9046 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9048 /* Targets with store-lane instructions must not require explicit
9049 realignment. vect_supportable_dr_alignment always returns either
9050 dr_aligned or dr_unaligned_supported for masked operations. */
9051 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9054 || alignment_support_scheme
== dr_aligned
9055 || alignment_support_scheme
== dr_unaligned_supported
);
9057 /* In case the vectorization factor (VF) is bigger than the number
9058 of elements that we can fit in a vectype (nunits), we have to generate
9059 more than one vector stmt - i.e - we need to "unroll" the
9060 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9061 from one copy of the vector stmt to the next, in the field
9062 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9063 stages to find the correct vector defs to be used when vectorizing
9064 stmts that use the defs of the current stmt. The example below
9065 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9066 need to create 4 vectorized stmts):
9068 before vectorization:
9069 RELATED_STMT VEC_STMT
9073 step 1: vectorize stmt S1:
9074 We first create the vector stmt VS1_0, and, as usual, record a
9075 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9076 Next, we create the vector stmt VS1_1, and record a pointer to
9077 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9078 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9080 RELATED_STMT VEC_STMT
9081 VS1_0: vx0 = memref0 VS1_1 -
9082 VS1_1: vx1 = memref1 VS1_2 -
9083 VS1_2: vx2 = memref2 VS1_3 -
9084 VS1_3: vx3 = memref3 - -
9085 S1: x = load - VS1_0
9088 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9089 information we recorded in RELATED_STMT field is used to vectorize
9092 /* In case of interleaving (non-unit grouped access):
9099 Vectorized loads are created in the order of memory accesses
9100 starting from the access of the first stmt of the chain:
9103 VS2: vx1 = &base + vec_size*1
9104 VS3: vx3 = &base + vec_size*2
9105 VS4: vx4 = &base + vec_size*3
9107 Then permutation statements are generated:
9109 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9110 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9113 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9114 (the order of the data-refs in the output of vect_permute_load_chain
9115 corresponds to the order of scalar stmts in the interleaving chain - see
9116 the documentation of vect_permute_load_chain()).
9117 The generation of permutation stmts and recording them in
9118 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9120 In case of both multiple types and interleaving, the vector loads and
9121 permutation stmts above are created for every copy. The result vector
9122 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9123 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9125 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9126 on a target that supports unaligned accesses (dr_unaligned_supported)
9127 we generate the following code:
9131 p = p + indx * vectype_size;
9136 Otherwise, the data reference is potentially unaligned on a target that
9137 does not support unaligned accesses (dr_explicit_realign_optimized) -
9138 then generate the following code, in which the data in each iteration is
9139 obtained by two vector loads, one from the previous iteration, and one
9140 from the current iteration:
9142 msq_init = *(floor(p1))
9143 p2 = initial_addr + VS - 1;
9144 realignment_token = call target_builtin;
9147 p2 = p2 + indx * vectype_size
9149 vec_dest = realign_load (msq, lsq, realignment_token)
9154 /* If the misalignment remains the same throughout the execution of the
9155 loop, we can create the init_addr and permutation mask at the loop
9156 preheader. Otherwise, it needs to be created inside the loop.
9157 This can only occur when vectorizing memory accesses in the inner-loop
9158 nested within an outer-loop that is being vectorized. */
9160 if (nested_in_vect_loop
9161 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9162 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9164 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9165 compute_in_loop
= true;
9168 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9169 || alignment_support_scheme
== dr_explicit_realign
)
9170 && !compute_in_loop
)
9172 msq
= vect_setup_realignment (first_stmt_info
, gsi
, &realignment_token
,
9173 alignment_support_scheme
, NULL_TREE
,
9175 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9177 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9178 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9185 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9186 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9189 tree vec_offset
= NULL_TREE
;
9190 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9192 aggr_type
= NULL_TREE
;
9195 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9197 aggr_type
= elem_type
;
9198 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9199 &bump
, &vec_offset
);
9203 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9204 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9206 aggr_type
= vectype
;
9207 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
9208 memory_access_type
);
9211 tree vec_mask
= NULL_TREE
;
9212 prev_stmt_info
= NULL
;
9213 poly_uint64 group_elt
= 0;
9214 for (j
= 0; j
< ncopies
; j
++)
9216 stmt_vec_info new_stmt_info
= NULL
;
9217 /* 1. Create the vector or array pointer update chain. */
9220 bool simd_lane_access_p
9221 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9222 if (simd_lane_access_p
9223 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9224 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9225 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
9226 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9227 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9228 get_alias_set (TREE_TYPE (ref_type
)))
9229 && (alignment_support_scheme
== dr_aligned
9230 || alignment_support_scheme
== dr_unaligned_supported
))
9232 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9233 dataref_offset
= build_int_cst (ref_type
, 0);
9235 else if (first_stmt_info_for_drptr
9236 && first_stmt_info
!= first_stmt_info_for_drptr
)
9239 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
9240 aggr_type
, at_loop
, offset
, &dummy
,
9241 gsi
, &ptr_incr
, simd_lane_access_p
,
9243 /* Adjust the pointer by the difference to first_stmt. */
9244 data_reference_p ptrdr
9245 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9247 = fold_convert (sizetype
,
9248 size_binop (MINUS_EXPR
,
9249 DR_INIT (first_dr_info
->dr
),
9251 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9254 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9255 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
9256 &dataref_ptr
, &vec_offset
);
9259 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
9260 offset
, &dummy
, gsi
, &ptr_incr
,
9267 auto_vec
<tree
> ops (1);
9268 auto_vec
<vec
<tree
> > vec_defs (1);
9269 ops
.quick_push (mask
);
9270 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9271 vec_mask
= vec_defs
[0][0];
9274 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
9281 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9283 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9284 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9286 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9289 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9292 if (grouped_load
|| slp_perm
)
9293 dr_chain
.create (vec_num
);
9295 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9299 vec_array
= create_vector_array (vectype
, vec_num
);
9301 tree final_mask
= NULL_TREE
;
9303 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9306 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9313 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9315 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9316 tree alias_ptr
= build_int_cst (ref_type
, align
);
9317 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9318 dataref_ptr
, alias_ptr
,
9324 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9325 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9326 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9328 gimple_call_set_lhs (call
, vec_array
);
9329 gimple_call_set_nothrow (call
, true);
9330 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
9332 /* Extract each vector into an SSA_NAME. */
9333 for (i
= 0; i
< vec_num
; i
++)
9335 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
9337 dr_chain
.quick_push (new_temp
);
9340 /* Record the mapping between SSA_NAMEs and statements. */
9341 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
9343 /* Record that VEC_ARRAY is now dead. */
9344 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
9348 for (i
= 0; i
< vec_num
; i
++)
9350 tree final_mask
= NULL_TREE
;
9352 && memory_access_type
!= VMAT_INVARIANT
)
9353 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9355 vectype
, vec_num
* j
+ i
);
9357 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9361 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9364 /* 2. Create the vector-load in the loop. */
9365 gimple
*new_stmt
= NULL
;
9366 switch (alignment_support_scheme
)
9369 case dr_unaligned_supported
:
9371 unsigned int misalign
;
9372 unsigned HOST_WIDE_INT align
;
9374 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9376 tree scale
= size_int (gs_info
.scale
);
9379 call
= gimple_build_call_internal
9380 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
9381 vec_offset
, scale
, final_mask
);
9383 call
= gimple_build_call_internal
9384 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
9386 gimple_call_set_nothrow (call
, true);
9388 data_ref
= NULL_TREE
;
9393 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9394 if (alignment_support_scheme
== dr_aligned
)
9396 gcc_assert (aligned_access_p (first_dr_info
));
9399 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9401 align
= dr_alignment
9402 (vect_dr_behavior (first_dr_info
));
9406 misalign
= DR_MISALIGNMENT (first_dr_info
);
9407 if (dataref_offset
== NULL_TREE
9408 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9409 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9414 align
= least_bit_hwi (misalign
| align
);
9415 tree ptr
= build_int_cst (ref_type
, align
);
9417 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9420 gimple_call_set_nothrow (call
, true);
9422 data_ref
= NULL_TREE
;
9426 tree ltype
= vectype
;
9427 /* If there's no peeling for gaps but we have a gap
9428 with slp loads then load the lower half of the
9429 vector only. See get_group_load_store_type for
9430 when we apply this optimization. */
9433 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9434 && DR_GROUP_GAP (first_stmt_info
) != 0
9435 && known_eq (nunits
,
9437 - DR_GROUP_GAP (first_stmt_info
)) * 2)
9438 && known_eq (nunits
, group_size
))
9439 ltype
= build_vector_type (TREE_TYPE (vectype
),
9442 (first_stmt_info
)));
9444 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
,
9447 : build_int_cst (ref_type
, 0));
9448 if (alignment_support_scheme
== dr_aligned
)
9450 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9451 TREE_TYPE (data_ref
)
9452 = build_aligned_type (TREE_TYPE (data_ref
),
9453 align
* BITS_PER_UNIT
);
9455 TREE_TYPE (data_ref
)
9456 = build_aligned_type (TREE_TYPE (data_ref
),
9457 TYPE_ALIGN (elem_type
));
9458 if (ltype
!= vectype
)
9460 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9461 tree tem
= make_ssa_name (ltype
);
9462 new_stmt
= gimple_build_assign (tem
, data_ref
);
9463 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9465 vec
<constructor_elt
, va_gc
> *v
;
9467 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9468 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9469 build_zero_cst (ltype
));
9471 = gimple_build_assign (vec_dest
,
9478 case dr_explicit_realign
:
9482 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9484 if (compute_in_loop
)
9485 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
9487 dr_explicit_realign
,
9490 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9491 ptr
= copy_ssa_name (dataref_ptr
);
9493 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9494 // For explicit realign the target alignment should be
9495 // known at compile time.
9496 unsigned HOST_WIDE_INT align
=
9497 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9498 new_stmt
= gimple_build_assign
9499 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9501 (TREE_TYPE (dataref_ptr
),
9502 -(HOST_WIDE_INT
) align
));
9503 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9505 = build2 (MEM_REF
, vectype
, ptr
,
9506 build_int_cst (ref_type
, 0));
9507 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9508 vec_dest
= vect_create_destination_var (scalar_dest
,
9510 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9511 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9512 gimple_assign_set_lhs (new_stmt
, new_temp
);
9513 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9514 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9517 bump
= size_binop (MULT_EXPR
, vs
,
9518 TYPE_SIZE_UNIT (elem_type
));
9519 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9520 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
9522 new_stmt
= gimple_build_assign
9523 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9525 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9526 ptr
= copy_ssa_name (ptr
, new_stmt
);
9527 gimple_assign_set_lhs (new_stmt
, ptr
);
9528 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9530 = build2 (MEM_REF
, vectype
, ptr
,
9531 build_int_cst (ref_type
, 0));
9534 case dr_explicit_realign_optimized
:
9536 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9537 new_temp
= copy_ssa_name (dataref_ptr
);
9539 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9540 // We should only be doing this if we know the target
9541 // alignment at compile time.
9542 unsigned HOST_WIDE_INT align
=
9543 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9544 new_stmt
= gimple_build_assign
9545 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9546 build_int_cst (TREE_TYPE (dataref_ptr
),
9547 -(HOST_WIDE_INT
) align
));
9548 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9550 = build2 (MEM_REF
, vectype
, new_temp
,
9551 build_int_cst (ref_type
, 0));
9557 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9558 /* DATA_REF is null if we've already built the statement. */
9561 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9562 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9564 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9565 gimple_set_lhs (new_stmt
, new_temp
);
9567 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9569 /* 3. Handle explicit realignment if necessary/supported.
9571 vec_dest = realign_load (msq, lsq, realignment_token) */
9572 if (alignment_support_scheme
== dr_explicit_realign_optimized
9573 || alignment_support_scheme
== dr_explicit_realign
)
9575 lsq
= gimple_assign_lhs (new_stmt
);
9576 if (!realignment_token
)
9577 realignment_token
= dataref_ptr
;
9578 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9579 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9580 msq
, lsq
, realignment_token
);
9581 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9582 gimple_assign_set_lhs (new_stmt
, new_temp
);
9584 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9586 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9589 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9590 add_phi_arg (phi
, lsq
,
9591 loop_latch_edge (containing_loop
),
9597 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9599 tree perm_mask
= perm_mask_for_reverse (vectype
);
9600 new_temp
= permute_vec_elements (new_temp
, new_temp
,
9601 perm_mask
, stmt_info
, gsi
);
9602 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9605 /* Collect vector loads and later create their permutation in
9606 vect_transform_grouped_load (). */
9607 if (grouped_load
|| slp_perm
)
9608 dr_chain
.quick_push (new_temp
);
9610 /* Store vector loads in the corresponding SLP_NODE. */
9611 if (slp
&& !slp_perm
)
9612 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9614 /* With SLP permutation we load the gaps as well, without
9615 we need to skip the gaps after we manage to fully load
9616 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9617 group_elt
+= nunits
;
9618 if (maybe_ne (group_gap_adj
, 0U)
9620 && known_eq (group_elt
, group_size
- group_gap_adj
))
9622 poly_wide_int bump_val
9623 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9625 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9626 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9631 /* Bump the vector pointer to account for a gap or for excess
9632 elements loaded for a permuted SLP load. */
9633 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9635 poly_wide_int bump_val
9636 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9638 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9639 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9644 if (slp
&& !slp_perm
)
9650 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
9651 slp_node_instance
, false,
9654 dr_chain
.release ();
9662 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9663 vect_transform_grouped_load (stmt_info
, dr_chain
,
9665 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9670 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9672 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9673 prev_stmt_info
= new_stmt_info
;
9676 dr_chain
.release ();
9682 /* Function vect_is_simple_cond.
9685 LOOP - the loop that is being vectorized.
9686 COND - Condition that is checked for simple use.
9689 *COMP_VECTYPE - the vector type for the comparison.
9690 *DTS - The def types for the arguments of the comparison
9692 Returns whether a COND can be vectorized. Checks whether
9693 condition operands are supportable using vec_is_simple_use. */
9696 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
9697 tree
*comp_vectype
, enum vect_def_type
*dts
,
9701 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9704 if (TREE_CODE (cond
) == SSA_NAME
9705 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9707 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
9709 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9714 if (!COMPARISON_CLASS_P (cond
))
9717 lhs
= TREE_OPERAND (cond
, 0);
9718 rhs
= TREE_OPERAND (cond
, 1);
9720 if (TREE_CODE (lhs
) == SSA_NAME
)
9722 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
9725 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9726 || TREE_CODE (lhs
) == FIXED_CST
)
9727 dts
[0] = vect_constant_def
;
9731 if (TREE_CODE (rhs
) == SSA_NAME
)
9733 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
9736 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9737 || TREE_CODE (rhs
) == FIXED_CST
)
9738 dts
[1] = vect_constant_def
;
9742 if (vectype1
&& vectype2
9743 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9744 TYPE_VECTOR_SUBPARTS (vectype2
)))
9747 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9748 /* Invariant comparison. */
9749 if (! *comp_vectype
)
9751 tree scalar_type
= TREE_TYPE (lhs
);
9752 /* If we can widen the comparison to match vectype do so. */
9753 if (INTEGRAL_TYPE_P (scalar_type
)
9755 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
9756 TYPE_SIZE (TREE_TYPE (vectype
))))
9757 scalar_type
= build_nonstandard_integer_type
9758 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
9759 TYPE_UNSIGNED (scalar_type
));
9760 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
9766 /* vectorizable_condition.
9768 Check if STMT_INFO is conditional modify expression that can be vectorized.
9769 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9770 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9773 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9775 Return true if STMT_INFO is vectorizable in this way. */
9778 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9779 stmt_vec_info
*vec_stmt
, bool for_reduction
,
9780 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9782 vec_info
*vinfo
= stmt_info
->vinfo
;
9783 tree scalar_dest
= NULL_TREE
;
9784 tree vec_dest
= NULL_TREE
;
9785 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
9786 tree then_clause
, else_clause
;
9787 tree comp_vectype
= NULL_TREE
;
9788 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
9789 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
9792 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9793 enum vect_def_type dts
[4]
9794 = {vect_unknown_def_type
, vect_unknown_def_type
,
9795 vect_unknown_def_type
, vect_unknown_def_type
};
9798 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9799 stmt_vec_info prev_stmt_info
= NULL
;
9801 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9802 vec
<tree
> vec_oprnds0
= vNULL
;
9803 vec
<tree
> vec_oprnds1
= vNULL
;
9804 vec
<tree
> vec_oprnds2
= vNULL
;
9805 vec
<tree
> vec_oprnds3
= vNULL
;
9807 bool masked
= false;
9809 if (for_reduction
&& STMT_SLP_TYPE (stmt_info
))
9812 vect_reduction_type reduction_type
9813 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
9814 if (reduction_type
== TREE_CODE_REDUCTION
)
9816 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9819 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9820 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
9824 /* FORNOW: not yet supported. */
9825 if (STMT_VINFO_LIVE_P (stmt_info
))
9827 if (dump_enabled_p ())
9828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9829 "value used after loop.\n");
9834 /* Is vectorizable conditional operation? */
9835 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9839 code
= gimple_assign_rhs_code (stmt
);
9841 if (code
!= COND_EXPR
)
9844 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9845 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9850 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9852 gcc_assert (ncopies
>= 1);
9853 if (for_reduction
&& ncopies
> 1)
9854 return false; /* FORNOW */
9856 cond_expr
= gimple_assign_rhs1 (stmt
);
9857 then_clause
= gimple_assign_rhs2 (stmt
);
9858 else_clause
= gimple_assign_rhs3 (stmt
);
9860 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
9861 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
9865 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
9867 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
9870 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
9873 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
9876 masked
= !COMPARISON_CLASS_P (cond_expr
);
9877 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
9879 if (vec_cmp_type
== NULL_TREE
)
9882 cond_code
= TREE_CODE (cond_expr
);
9885 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
9886 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
9889 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
9891 /* Boolean values may have another representation in vectors
9892 and therefore we prefer bit operations over comparison for
9893 them (which also works for scalar masks). We store opcodes
9894 to use in bitop1 and bitop2. Statement is vectorized as
9895 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9896 depending on bitop1 and bitop2 arity. */
9900 bitop1
= BIT_NOT_EXPR
;
9901 bitop2
= BIT_AND_EXPR
;
9904 bitop1
= BIT_NOT_EXPR
;
9905 bitop2
= BIT_IOR_EXPR
;
9908 bitop1
= BIT_NOT_EXPR
;
9909 bitop2
= BIT_AND_EXPR
;
9910 std::swap (cond_expr0
, cond_expr1
);
9913 bitop1
= BIT_NOT_EXPR
;
9914 bitop2
= BIT_IOR_EXPR
;
9915 std::swap (cond_expr0
, cond_expr1
);
9918 bitop1
= BIT_XOR_EXPR
;
9921 bitop1
= BIT_XOR_EXPR
;
9922 bitop2
= BIT_NOT_EXPR
;
9927 cond_code
= SSA_NAME
;
9932 if (bitop1
!= NOP_EXPR
)
9934 machine_mode mode
= TYPE_MODE (comp_vectype
);
9937 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
9938 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9941 if (bitop2
!= NOP_EXPR
)
9943 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
9945 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9949 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
9952 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
9953 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
9964 vec_oprnds0
.create (1);
9965 vec_oprnds1
.create (1);
9966 vec_oprnds2
.create (1);
9967 vec_oprnds3
.create (1);
9971 scalar_dest
= gimple_assign_lhs (stmt
);
9972 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
9973 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9975 /* Handle cond expr. */
9976 for (j
= 0; j
< ncopies
; j
++)
9978 stmt_vec_info new_stmt_info
= NULL
;
9983 auto_vec
<tree
, 4> ops
;
9984 auto_vec
<vec
<tree
>, 4> vec_defs
;
9987 ops
.safe_push (cond_expr
);
9990 ops
.safe_push (cond_expr0
);
9991 ops
.safe_push (cond_expr1
);
9993 ops
.safe_push (then_clause
);
9994 ops
.safe_push (else_clause
);
9995 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9996 vec_oprnds3
= vec_defs
.pop ();
9997 vec_oprnds2
= vec_defs
.pop ();
9999 vec_oprnds1
= vec_defs
.pop ();
10000 vec_oprnds0
= vec_defs
.pop ();
10007 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
10013 = vect_get_vec_def_for_operand (cond_expr0
,
10014 stmt_info
, comp_vectype
);
10016 = vect_get_vec_def_for_operand (cond_expr1
,
10017 stmt_info
, comp_vectype
);
10019 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
10021 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10022 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
10029 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
10032 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
10034 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10035 vec_oprnds2
.pop ());
10036 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10037 vec_oprnds3
.pop ());
10042 vec_oprnds0
.quick_push (vec_cond_lhs
);
10044 vec_oprnds1
.quick_push (vec_cond_rhs
);
10045 vec_oprnds2
.quick_push (vec_then_clause
);
10046 vec_oprnds3
.quick_push (vec_else_clause
);
10049 /* Arguments are ready. Create the new vector stmt. */
10050 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10052 vec_then_clause
= vec_oprnds2
[i
];
10053 vec_else_clause
= vec_oprnds3
[i
];
10056 vec_compare
= vec_cond_lhs
;
10059 vec_cond_rhs
= vec_oprnds1
[i
];
10060 if (bitop1
== NOP_EXPR
)
10061 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10062 vec_cond_lhs
, vec_cond_rhs
);
10065 new_temp
= make_ssa_name (vec_cmp_type
);
10067 if (bitop1
== BIT_NOT_EXPR
)
10068 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10072 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10074 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10075 if (bitop2
== NOP_EXPR
)
10076 vec_compare
= new_temp
;
10077 else if (bitop2
== BIT_NOT_EXPR
)
10079 /* Instead of doing ~x ? y : z do x ? z : y. */
10080 vec_compare
= new_temp
;
10081 std::swap (vec_then_clause
, vec_else_clause
);
10085 vec_compare
= make_ssa_name (vec_cmp_type
);
10087 = gimple_build_assign (vec_compare
, bitop2
,
10088 vec_cond_lhs
, new_temp
);
10089 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10093 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10095 if (!is_gimple_val (vec_compare
))
10097 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10098 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10100 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10101 vec_compare
= vec_compare_name
;
10103 gcall
*new_stmt
= gimple_build_call_internal
10104 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10106 gimple_call_set_lhs (new_stmt
, scalar_dest
);
10107 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
10108 if (stmt_info
->stmt
== gsi_stmt (*gsi
))
10109 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
10112 /* In this case we're moving the definition to later in the
10113 block. That doesn't matter because the only uses of the
10114 lhs are in phi statements. */
10115 gimple_stmt_iterator old_gsi
10116 = gsi_for_stmt (stmt_info
->stmt
);
10117 gsi_remove (&old_gsi
, true);
10119 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10124 new_temp
= make_ssa_name (vec_dest
);
10126 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10127 vec_then_clause
, vec_else_clause
);
10129 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10132 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10139 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10141 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10143 prev_stmt_info
= new_stmt_info
;
10146 vec_oprnds0
.release ();
10147 vec_oprnds1
.release ();
10148 vec_oprnds2
.release ();
10149 vec_oprnds3
.release ();
10154 /* vectorizable_comparison.
10156 Check if STMT_INFO is comparison expression that can be vectorized.
10157 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10158 comparison, put it in VEC_STMT, and insert it at GSI.
10160 Return true if STMT_INFO is vectorizable in this way. */
10163 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10164 stmt_vec_info
*vec_stmt
,
10165 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10167 vec_info
*vinfo
= stmt_info
->vinfo
;
10168 tree lhs
, rhs1
, rhs2
;
10169 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10170 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10171 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10173 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10174 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10176 poly_uint64 nunits
;
10178 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10179 stmt_vec_info prev_stmt_info
= NULL
;
10181 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10182 vec
<tree
> vec_oprnds0
= vNULL
;
10183 vec
<tree
> vec_oprnds1
= vNULL
;
10187 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10190 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10193 mask_type
= vectype
;
10194 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10199 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10201 gcc_assert (ncopies
>= 1);
10202 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10205 if (STMT_VINFO_LIVE_P (stmt_info
))
10207 if (dump_enabled_p ())
10208 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10209 "value used after loop.\n");
10213 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10217 code
= gimple_assign_rhs_code (stmt
);
10219 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10222 rhs1
= gimple_assign_rhs1 (stmt
);
10223 rhs2
= gimple_assign_rhs2 (stmt
);
10225 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
10228 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
10231 if (vectype1
&& vectype2
10232 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10233 TYPE_VECTOR_SUBPARTS (vectype2
)))
10236 vectype
= vectype1
? vectype1
: vectype2
;
10238 /* Invariant comparison. */
10241 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
10242 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10245 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10248 /* Can't compare mask and non-mask types. */
10249 if (vectype1
&& vectype2
10250 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10253 /* Boolean values may have another representation in vectors
10254 and therefore we prefer bit operations over comparison for
10255 them (which also works for scalar masks). We store opcodes
10256 to use in bitop1 and bitop2. Statement is vectorized as
10257 BITOP2 (rhs1 BITOP1 rhs2) or
10258 rhs1 BITOP2 (BITOP1 rhs2)
10259 depending on bitop1 and bitop2 arity. */
10260 bool swap_p
= false;
10261 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10263 if (code
== GT_EXPR
)
10265 bitop1
= BIT_NOT_EXPR
;
10266 bitop2
= BIT_AND_EXPR
;
10268 else if (code
== GE_EXPR
)
10270 bitop1
= BIT_NOT_EXPR
;
10271 bitop2
= BIT_IOR_EXPR
;
10273 else if (code
== LT_EXPR
)
10275 bitop1
= BIT_NOT_EXPR
;
10276 bitop2
= BIT_AND_EXPR
;
10279 else if (code
== LE_EXPR
)
10281 bitop1
= BIT_NOT_EXPR
;
10282 bitop2
= BIT_IOR_EXPR
;
10287 bitop1
= BIT_XOR_EXPR
;
10288 if (code
== EQ_EXPR
)
10289 bitop2
= BIT_NOT_EXPR
;
10295 if (bitop1
== NOP_EXPR
)
10297 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10302 machine_mode mode
= TYPE_MODE (vectype
);
10305 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10306 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10309 if (bitop2
!= NOP_EXPR
)
10311 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10312 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10317 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10318 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10319 dts
, ndts
, slp_node
, cost_vec
);
10326 vec_oprnds0
.create (1);
10327 vec_oprnds1
.create (1);
10331 lhs
= gimple_assign_lhs (stmt
);
10332 mask
= vect_create_destination_var (lhs
, mask_type
);
10334 /* Handle cmp expr. */
10335 for (j
= 0; j
< ncopies
; j
++)
10337 stmt_vec_info new_stmt_info
= NULL
;
10342 auto_vec
<tree
, 2> ops
;
10343 auto_vec
<vec
<tree
>, 2> vec_defs
;
10345 ops
.safe_push (rhs1
);
10346 ops
.safe_push (rhs2
);
10347 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
10348 vec_oprnds1
= vec_defs
.pop ();
10349 vec_oprnds0
= vec_defs
.pop ();
10351 std::swap (vec_oprnds0
, vec_oprnds1
);
10355 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
10357 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
10363 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10364 vec_oprnds0
.pop ());
10365 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10366 vec_oprnds1
.pop ());
10371 if (swap_p
&& j
== 0)
10372 std::swap (vec_rhs1
, vec_rhs2
);
10373 vec_oprnds0
.quick_push (vec_rhs1
);
10374 vec_oprnds1
.quick_push (vec_rhs2
);
10377 /* Arguments are ready. Create the new vector stmt. */
10378 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10380 vec_rhs2
= vec_oprnds1
[i
];
10382 new_temp
= make_ssa_name (mask
);
10383 if (bitop1
== NOP_EXPR
)
10385 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10386 vec_rhs1
, vec_rhs2
);
10388 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10393 if (bitop1
== BIT_NOT_EXPR
)
10394 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10396 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10399 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10400 if (bitop2
!= NOP_EXPR
)
10402 tree res
= make_ssa_name (mask
);
10403 if (bitop2
== BIT_NOT_EXPR
)
10404 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10406 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10409 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10413 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10420 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10422 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10424 prev_stmt_info
= new_stmt_info
;
10427 vec_oprnds0
.release ();
10428 vec_oprnds1
.release ();
10433 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10434 can handle all live statements in the node. Otherwise return true
10435 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10436 GSI and VEC_STMT are as for vectorizable_live_operation. */
10439 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10440 slp_tree slp_node
, stmt_vec_info
*vec_stmt
,
10441 stmt_vector_for_cost
*cost_vec
)
10445 stmt_vec_info slp_stmt_info
;
10447 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10449 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10450 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
, i
,
10451 vec_stmt
, cost_vec
))
10455 else if (STMT_VINFO_LIVE_P (stmt_info
)
10456 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
, -1,
10457 vec_stmt
, cost_vec
))
10463 /* Make sure the statement is vectorizable. */
10466 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10467 slp_tree node
, slp_instance node_instance
,
10468 stmt_vector_for_cost
*cost_vec
)
10470 vec_info
*vinfo
= stmt_info
->vinfo
;
10471 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10472 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10474 gimple_seq pattern_def_seq
;
10476 if (dump_enabled_p ())
10477 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10480 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10481 return opt_result::failure_at (stmt_info
->stmt
,
10483 " stmt has volatile operands: %G\n",
10486 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10488 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10490 gimple_stmt_iterator si
;
10492 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10494 stmt_vec_info pattern_def_stmt_info
10495 = vinfo
->lookup_stmt (gsi_stmt (si
));
10496 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10497 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10499 /* Analyze def stmt of STMT if it's a pattern stmt. */
10500 if (dump_enabled_p ())
10501 dump_printf_loc (MSG_NOTE
, vect_location
,
10502 "==> examining pattern def statement: %G",
10503 pattern_def_stmt_info
->stmt
);
10506 = vect_analyze_stmt (pattern_def_stmt_info
,
10507 need_to_vectorize
, node
, node_instance
,
10515 /* Skip stmts that do not need to be vectorized. In loops this is expected
10517 - the COND_EXPR which is the loop exit condition
10518 - any LABEL_EXPRs in the loop
10519 - computations that are used only for array indexing or loop control.
10520 In basic blocks we only analyze statements that are a part of some SLP
10521 instance, therefore, all the statements are relevant.
10523 Pattern statement needs to be analyzed instead of the original statement
10524 if the original statement is not relevant. Otherwise, we analyze both
10525 statements. In basic blocks we are called from some SLP instance
10526 traversal, don't analyze pattern stmts instead, the pattern stmts
10527 already will be part of SLP instance. */
10529 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10530 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10531 && !STMT_VINFO_LIVE_P (stmt_info
))
10533 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10534 && pattern_stmt_info
10535 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10536 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10538 /* Analyze PATTERN_STMT instead of the original stmt. */
10539 stmt_info
= pattern_stmt_info
;
10540 if (dump_enabled_p ())
10541 dump_printf_loc (MSG_NOTE
, vect_location
,
10542 "==> examining pattern statement: %G",
10547 if (dump_enabled_p ())
10548 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10550 return opt_result::success ();
10553 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10555 && pattern_stmt_info
10556 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10557 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10559 /* Analyze PATTERN_STMT too. */
10560 if (dump_enabled_p ())
10561 dump_printf_loc (MSG_NOTE
, vect_location
,
10562 "==> examining pattern statement: %G",
10563 pattern_stmt_info
->stmt
);
10566 = vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
10567 node_instance
, cost_vec
);
10572 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10574 case vect_internal_def
:
10577 case vect_reduction_def
:
10578 case vect_nested_cycle
:
10579 gcc_assert (!bb_vinfo
10580 && (relevance
== vect_used_in_outer
10581 || relevance
== vect_used_in_outer_by_reduction
10582 || relevance
== vect_used_by_reduction
10583 || relevance
== vect_unused_in_scope
10584 || relevance
== vect_used_only_live
));
10587 case vect_induction_def
:
10588 gcc_assert (!bb_vinfo
);
10591 case vect_constant_def
:
10592 case vect_external_def
:
10593 case vect_unknown_def_type
:
10595 gcc_unreachable ();
10598 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10600 tree type
= gimple_expr_type (stmt_info
->stmt
);
10601 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
10602 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10603 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10604 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10605 *need_to_vectorize
= true;
10608 if (PURE_SLP_STMT (stmt_info
) && !node
)
10610 if (dump_enabled_p ())
10611 dump_printf_loc (MSG_NOTE
, vect_location
,
10612 "handled only by SLP analysis\n");
10613 return opt_result::success ();
10618 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10619 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10620 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10621 -mveclibabi= takes preference over library functions with
10622 the simd attribute. */
10623 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10624 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10626 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10627 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10628 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10629 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10631 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10632 || vectorizable_reduction (stmt_info
, NULL
, NULL
, node
,
10633 node_instance
, cost_vec
)
10634 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10635 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10636 || vectorizable_condition (stmt_info
, NULL
, NULL
, false, node
,
10638 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10643 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10644 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10646 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
10648 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10649 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10650 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
10652 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10654 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10655 || vectorizable_condition (stmt_info
, NULL
, NULL
, false, node
,
10657 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10662 return opt_result::failure_at (stmt_info
->stmt
,
10664 " relevant stmt not supported: %G",
10667 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10668 need extra handling, except for vectorizable reductions. */
10670 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10671 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, NULL
, cost_vec
))
10672 return opt_result::failure_at (stmt_info
->stmt
,
10674 " live stmt not supported: %G",
10677 return opt_result::success ();
10681 /* Function vect_transform_stmt.
10683 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
10686 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10687 slp_tree slp_node
, slp_instance slp_node_instance
)
10689 vec_info
*vinfo
= stmt_info
->vinfo
;
10690 bool is_store
= false;
10691 stmt_vec_info vec_stmt
= NULL
;
10694 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
10695 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
10697 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
10698 && nested_in_vect_loop_p
10699 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
10702 gimple
*stmt
= stmt_info
->stmt
;
10703 switch (STMT_VINFO_TYPE (stmt_info
))
10705 case type_demotion_vec_info_type
:
10706 case type_promotion_vec_info_type
:
10707 case type_conversion_vec_info_type
:
10708 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10713 case induc_vec_info_type
:
10714 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10719 case shift_vec_info_type
:
10720 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10724 case op_vec_info_type
:
10725 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10730 case assignment_vec_info_type
:
10731 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10736 case load_vec_info_type
:
10737 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10738 slp_node_instance
, NULL
);
10742 case store_vec_info_type
:
10743 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10745 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
10747 /* In case of interleaving, the whole chain is vectorized when the
10748 last store in the chain is reached. Store stmts before the last
10749 one are skipped, and there vec_stmt_info shouldn't be freed
10751 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10752 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
10759 case condition_vec_info_type
:
10760 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, false,
10765 case comparison_vec_info_type
:
10766 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
,
10771 case call_vec_info_type
:
10772 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10773 stmt
= gsi_stmt (*gsi
);
10776 case call_simd_clone_vec_info_type
:
10777 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
10779 stmt
= gsi_stmt (*gsi
);
10782 case reduc_vec_info_type
:
10783 done
= vectorizable_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10784 slp_node_instance
, NULL
);
10789 if (!STMT_VINFO_LIVE_P (stmt_info
))
10791 if (dump_enabled_p ())
10792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10793 "stmt not supported.\n");
10794 gcc_unreachable ();
10798 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
10799 This would break hybrid SLP vectorization. */
10801 gcc_assert (!vec_stmt
10802 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
10804 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
10805 is being vectorized, but outside the immediately enclosing loop. */
10808 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10809 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
10810 || STMT_VINFO_RELEVANT (stmt_info
) ==
10811 vect_used_in_outer_by_reduction
))
10813 class loop
*innerloop
= LOOP_VINFO_LOOP (
10814 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
10815 imm_use_iterator imm_iter
;
10816 use_operand_p use_p
;
10819 if (dump_enabled_p ())
10820 dump_printf_loc (MSG_NOTE
, vect_location
,
10821 "Record the vdef for outer-loop vectorization.\n");
10823 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
10824 (to be used when vectorizing outer-loop stmts that use the DEF of
10826 if (gimple_code (stmt
) == GIMPLE_PHI
)
10827 scalar_dest
= PHI_RESULT (stmt
);
10829 scalar_dest
= gimple_get_lhs (stmt
);
10831 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
10832 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
10834 stmt_vec_info exit_phi_info
10835 = vinfo
->lookup_stmt (USE_STMT (use_p
));
10836 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
10840 /* Handle stmts whose DEF is used outside the loop-nest that is
10841 being vectorized. */
10842 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
10844 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
, &vec_stmt
,
10850 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
10856 /* Remove a group of stores (for SLP or interleaving), free their
10860 vect_remove_stores (stmt_vec_info first_stmt_info
)
10862 vec_info
*vinfo
= first_stmt_info
->vinfo
;
10863 stmt_vec_info next_stmt_info
= first_stmt_info
;
10865 while (next_stmt_info
)
10867 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10868 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
10869 /* Free the attached stmt_vec_info and remove the stmt. */
10870 vinfo
->remove_stmt (next_stmt_info
);
10871 next_stmt_info
= tmp
;
10875 /* Function get_vectype_for_scalar_type_and_size.
10877 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
10881 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
10883 tree orig_scalar_type
= scalar_type
;
10884 scalar_mode inner_mode
;
10885 machine_mode simd_mode
;
10886 poly_uint64 nunits
;
10889 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
10890 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
10893 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
10895 /* For vector types of elements whose mode precision doesn't
10896 match their types precision we use a element type of mode
10897 precision. The vectorization routines will have to make sure
10898 they support the proper result truncation/extension.
10899 We also make sure to build vector types with INTEGER_TYPE
10900 component type only. */
10901 if (INTEGRAL_TYPE_P (scalar_type
)
10902 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
10903 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
10904 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
10905 TYPE_UNSIGNED (scalar_type
));
10907 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10908 When the component mode passes the above test simply use a type
10909 corresponding to that mode. The theory is that any use that
10910 would cause problems with this will disable vectorization anyway. */
10911 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
10912 && !INTEGRAL_TYPE_P (scalar_type
))
10913 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
10915 /* We can't build a vector type of elements with alignment bigger than
10917 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
10918 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
10919 TYPE_UNSIGNED (scalar_type
));
10921 /* If we felt back to using the mode fail if there was
10922 no scalar type for it. */
10923 if (scalar_type
== NULL_TREE
)
10926 /* If no size was supplied use the mode the target prefers. Otherwise
10927 lookup a vector mode of the specified size. */
10928 if (known_eq (size
, 0U))
10929 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
10930 else if (!multiple_p (size
, nbytes
, &nunits
)
10931 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
10933 /* NOTE: nunits == 1 is allowed to support single element vector types. */
10934 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
10937 vectype
= build_vector_type (scalar_type
, nunits
);
10939 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
10940 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
10943 /* Re-attach the address-space qualifier if we canonicalized the scalar
10945 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
10946 return build_qualified_type
10947 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
10952 poly_uint64 current_vector_size
;
10954 /* Function get_vectype_for_scalar_type.
10956 Returns the vector type corresponding to SCALAR_TYPE as supported
10960 get_vectype_for_scalar_type (tree scalar_type
)
10963 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
10964 current_vector_size
);
10966 && known_eq (current_vector_size
, 0U))
10967 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
10971 /* Function get_mask_type_for_scalar_type.
10973 Returns the mask type corresponding to a result of comparison
10974 of vectors of specified SCALAR_TYPE as supported by target. */
10977 get_mask_type_for_scalar_type (tree scalar_type
)
10979 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
10984 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
10985 current_vector_size
);
10988 /* Function get_same_sized_vectype
10990 Returns a vector type corresponding to SCALAR_TYPE of size
10991 VECTOR_TYPE if supported by the target. */
10994 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
10996 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10997 return build_same_sized_truth_vector_type (vector_type
);
10999 return get_vectype_for_scalar_type_and_size
11000 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
11003 /* Function vect_is_simple_use.
11006 VINFO - the vect info of the loop or basic block that is being vectorized.
11007 OPERAND - operand in the loop or bb.
11009 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11010 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11011 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11012 the definition could be anywhere in the function
11013 DT - the type of definition
11015 Returns whether a stmt with OPERAND can be vectorized.
11016 For loops, supportable operands are constants, loop invariants, and operands
11017 that are defined by the current iteration of the loop. Unsupportable
11018 operands are those that are defined by a previous iteration of the loop (as
11019 is the case in reduction/induction computations).
11020 For basic blocks, supportable operands are constants and bb invariants.
11021 For now, operands defined outside the basic block are not supported. */
11024 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11025 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11027 if (def_stmt_info_out
)
11028 *def_stmt_info_out
= NULL
;
11030 *def_stmt_out
= NULL
;
11031 *dt
= vect_unknown_def_type
;
11033 if (dump_enabled_p ())
11035 dump_printf_loc (MSG_NOTE
, vect_location
,
11036 "vect_is_simple_use: operand ");
11037 if (TREE_CODE (operand
) == SSA_NAME
11038 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11039 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11041 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11044 if (CONSTANT_CLASS_P (operand
))
11045 *dt
= vect_constant_def
;
11046 else if (is_gimple_min_invariant (operand
))
11047 *dt
= vect_external_def
;
11048 else if (TREE_CODE (operand
) != SSA_NAME
)
11049 *dt
= vect_unknown_def_type
;
11050 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11051 *dt
= vect_external_def
;
11054 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11055 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11057 *dt
= vect_external_def
;
11060 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11061 def_stmt
= stmt_vinfo
->stmt
;
11062 switch (gimple_code (def_stmt
))
11065 case GIMPLE_ASSIGN
:
11067 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11070 *dt
= vect_unknown_def_type
;
11073 if (def_stmt_info_out
)
11074 *def_stmt_info_out
= stmt_vinfo
;
11077 *def_stmt_out
= def_stmt
;
11080 if (dump_enabled_p ())
11082 dump_printf (MSG_NOTE
, ", type of def: ");
11085 case vect_uninitialized_def
:
11086 dump_printf (MSG_NOTE
, "uninitialized\n");
11088 case vect_constant_def
:
11089 dump_printf (MSG_NOTE
, "constant\n");
11091 case vect_external_def
:
11092 dump_printf (MSG_NOTE
, "external\n");
11094 case vect_internal_def
:
11095 dump_printf (MSG_NOTE
, "internal\n");
11097 case vect_induction_def
:
11098 dump_printf (MSG_NOTE
, "induction\n");
11100 case vect_reduction_def
:
11101 dump_printf (MSG_NOTE
, "reduction\n");
11103 case vect_double_reduction_def
:
11104 dump_printf (MSG_NOTE
, "double reduction\n");
11106 case vect_nested_cycle
:
11107 dump_printf (MSG_NOTE
, "nested cycle\n");
11109 case vect_unknown_def_type
:
11110 dump_printf (MSG_NOTE
, "unknown\n");
11115 if (*dt
== vect_unknown_def_type
)
11117 if (dump_enabled_p ())
11118 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11119 "Unsupported pattern.\n");
11126 /* Function vect_is_simple_use.
11128 Same as vect_is_simple_use but also determines the vector operand
11129 type of OPERAND and stores it to *VECTYPE. If the definition of
11130 OPERAND is vect_uninitialized_def, vect_constant_def or
11131 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11132 is responsible to compute the best suited vector type for the
11136 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11137 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11138 gimple
**def_stmt_out
)
11140 stmt_vec_info def_stmt_info
;
11142 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11146 *def_stmt_out
= def_stmt
;
11147 if (def_stmt_info_out
)
11148 *def_stmt_info_out
= def_stmt_info
;
11150 /* Now get a vector type if the def is internal, otherwise supply
11151 NULL_TREE and leave it up to the caller to figure out a proper
11152 type for the use stmt. */
11153 if (*dt
== vect_internal_def
11154 || *dt
== vect_induction_def
11155 || *dt
== vect_reduction_def
11156 || *dt
== vect_double_reduction_def
11157 || *dt
== vect_nested_cycle
)
11159 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11160 gcc_assert (*vectype
!= NULL_TREE
);
11161 if (dump_enabled_p ())
11162 dump_printf_loc (MSG_NOTE
, vect_location
,
11163 "vect_is_simple_use: vectype %T\n", *vectype
);
11165 else if (*dt
== vect_uninitialized_def
11166 || *dt
== vect_constant_def
11167 || *dt
== vect_external_def
)
11168 *vectype
= NULL_TREE
;
11170 gcc_unreachable ();
11176 /* Function supportable_widening_operation
11178 Check whether an operation represented by the code CODE is a
11179 widening operation that is supported by the target platform in
11180 vector form (i.e., when operating on arguments of type VECTYPE_IN
11181 producing a result of type VECTYPE_OUT).
11183 Widening operations we currently support are NOP (CONVERT), FLOAT,
11184 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11185 are supported by the target platform either directly (via vector
11186 tree-codes), or via target builtins.
11189 - CODE1 and CODE2 are codes of vector operations to be used when
11190 vectorizing the operation, if available.
11191 - MULTI_STEP_CVT determines the number of required intermediate steps in
11192 case of multi-step conversion (like char->short->int - in that case
11193 MULTI_STEP_CVT will be 1).
11194 - INTERM_TYPES contains the intermediate type required to perform the
11195 widening operation (short in the above example). */
11198 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
11199 tree vectype_out
, tree vectype_in
,
11200 enum tree_code
*code1
, enum tree_code
*code2
,
11201 int *multi_step_cvt
,
11202 vec
<tree
> *interm_types
)
11204 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
11205 class loop
*vect_loop
= NULL
;
11206 machine_mode vec_mode
;
11207 enum insn_code icode1
, icode2
;
11208 optab optab1
, optab2
;
11209 tree vectype
= vectype_in
;
11210 tree wide_vectype
= vectype_out
;
11211 enum tree_code c1
, c2
;
11213 tree prev_type
, intermediate_type
;
11214 machine_mode intermediate_mode
, prev_mode
;
11215 optab optab3
, optab4
;
11217 *multi_step_cvt
= 0;
11219 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11223 case WIDEN_MULT_EXPR
:
11224 /* The result of a vectorized widening operation usually requires
11225 two vectors (because the widened results do not fit into one vector).
11226 The generated vector results would normally be expected to be
11227 generated in the same order as in the original scalar computation,
11228 i.e. if 8 results are generated in each vector iteration, they are
11229 to be organized as follows:
11230 vect1: [res1,res2,res3,res4],
11231 vect2: [res5,res6,res7,res8].
11233 However, in the special case that the result of the widening
11234 operation is used in a reduction computation only, the order doesn't
11235 matter (because when vectorizing a reduction we change the order of
11236 the computation). Some targets can take advantage of this and
11237 generate more efficient code. For example, targets like Altivec,
11238 that support widen_mult using a sequence of {mult_even,mult_odd}
11239 generate the following vectors:
11240 vect1: [res1,res3,res5,res7],
11241 vect2: [res2,res4,res6,res8].
11243 When vectorizing outer-loops, we execute the inner-loop sequentially
11244 (each vectorized inner-loop iteration contributes to VF outer-loop
11245 iterations in parallel). We therefore don't allow to change the
11246 order of the computation in the inner-loop during outer-loop
11248 /* TODO: Another case in which order doesn't *really* matter is when we
11249 widen and then contract again, e.g. (short)((int)x * y >> 8).
11250 Normally, pack_trunc performs an even/odd permute, whereas the
11251 repack from an even/odd expansion would be an interleave, which
11252 would be significantly simpler for e.g. AVX2. */
11253 /* In any case, in order to avoid duplicating the code below, recurse
11254 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11255 are properly set up for the caller. If we fail, we'll continue with
11256 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11258 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11259 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11260 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
11261 stmt_info
, vectype_out
,
11262 vectype_in
, code1
, code2
,
11263 multi_step_cvt
, interm_types
))
11265 /* Elements in a vector with vect_used_by_reduction property cannot
11266 be reordered if the use chain with this property does not have the
11267 same operation. One such an example is s += a * b, where elements
11268 in a and b cannot be reordered. Here we check if the vector defined
11269 by STMT is only directly used in the reduction statement. */
11270 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11271 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11273 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11276 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11277 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11280 case DOT_PROD_EXPR
:
11281 c1
= DOT_PROD_EXPR
;
11282 c2
= DOT_PROD_EXPR
;
11290 case VEC_WIDEN_MULT_EVEN_EXPR
:
11291 /* Support the recursion induced just above. */
11292 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11293 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11296 case WIDEN_LSHIFT_EXPR
:
11297 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11298 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11302 c1
= VEC_UNPACK_LO_EXPR
;
11303 c2
= VEC_UNPACK_HI_EXPR
;
11307 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11308 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11311 case FIX_TRUNC_EXPR
:
11312 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11313 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11317 gcc_unreachable ();
11320 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11321 std::swap (c1
, c2
);
11323 if (code
== FIX_TRUNC_EXPR
)
11325 /* The signedness is determined from output operand. */
11326 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11327 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11329 else if (CONVERT_EXPR_CODE_P (code
)
11330 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11331 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11332 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11333 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11335 /* If the input and result modes are the same, a different optab
11336 is needed where we pass in the number of units in vectype. */
11337 optab1
= vec_unpacks_sbool_lo_optab
;
11338 optab2
= vec_unpacks_sbool_hi_optab
;
11342 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11343 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11346 if (!optab1
|| !optab2
)
11349 vec_mode
= TYPE_MODE (vectype
);
11350 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11351 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11357 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11358 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11360 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11362 /* For scalar masks we may have different boolean
11363 vector types having the same QImode. Thus we
11364 add additional check for elements number. */
11365 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11366 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11370 /* Check if it's a multi-step conversion that can be done using intermediate
11373 prev_type
= vectype
;
11374 prev_mode
= vec_mode
;
11376 if (!CONVERT_EXPR_CODE_P (code
))
11379 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11380 intermediate steps in promotion sequence. We try
11381 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11383 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11384 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11386 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11387 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11389 intermediate_type
= vect_halve_mask_nunits (prev_type
);
11390 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
11395 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11396 TYPE_UNSIGNED (prev_type
));
11398 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11399 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11400 && intermediate_mode
== prev_mode
11401 && SCALAR_INT_MODE_P (prev_mode
))
11403 /* If the input and result modes are the same, a different optab
11404 is needed where we pass in the number of units in vectype. */
11405 optab3
= vec_unpacks_sbool_lo_optab
;
11406 optab4
= vec_unpacks_sbool_hi_optab
;
11410 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11411 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11414 if (!optab3
|| !optab4
11415 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11416 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11417 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11418 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11419 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11420 == CODE_FOR_nothing
)
11421 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11422 == CODE_FOR_nothing
))
11425 interm_types
->quick_push (intermediate_type
);
11426 (*multi_step_cvt
)++;
11428 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11429 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11431 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11433 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11434 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11438 prev_type
= intermediate_type
;
11439 prev_mode
= intermediate_mode
;
11442 interm_types
->release ();
11447 /* Function supportable_narrowing_operation
11449 Check whether an operation represented by the code CODE is a
11450 narrowing operation that is supported by the target platform in
11451 vector form (i.e., when operating on arguments of type VECTYPE_IN
11452 and producing a result of type VECTYPE_OUT).
11454 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11455 and FLOAT. This function checks if these operations are supported by
11456 the target platform directly via vector tree-codes.
11459 - CODE1 is the code of a vector operation to be used when
11460 vectorizing the operation, if available.
11461 - MULTI_STEP_CVT determines the number of required intermediate steps in
11462 case of multi-step conversion (like int->short->char - in that case
11463 MULTI_STEP_CVT will be 1).
11464 - INTERM_TYPES contains the intermediate type required to perform the
11465 narrowing operation (short in the above example). */
11468 supportable_narrowing_operation (enum tree_code code
,
11469 tree vectype_out
, tree vectype_in
,
11470 enum tree_code
*code1
, int *multi_step_cvt
,
11471 vec
<tree
> *interm_types
)
11473 machine_mode vec_mode
;
11474 enum insn_code icode1
;
11475 optab optab1
, interm_optab
;
11476 tree vectype
= vectype_in
;
11477 tree narrow_vectype
= vectype_out
;
11479 tree intermediate_type
, prev_type
;
11480 machine_mode intermediate_mode
, prev_mode
;
11484 *multi_step_cvt
= 0;
11488 c1
= VEC_PACK_TRUNC_EXPR
;
11489 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11490 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11491 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11492 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11493 optab1
= vec_pack_sbool_trunc_optab
;
11495 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11498 case FIX_TRUNC_EXPR
:
11499 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11500 /* The signedness is determined from output operand. */
11501 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11505 c1
= VEC_PACK_FLOAT_EXPR
;
11506 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11510 gcc_unreachable ();
11516 vec_mode
= TYPE_MODE (vectype
);
11517 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
11522 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11524 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11526 /* For scalar masks we may have different boolean
11527 vector types having the same QImode. Thus we
11528 add additional check for elements number. */
11529 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
11530 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11534 if (code
== FLOAT_EXPR
)
11537 /* Check if it's a multi-step conversion that can be done using intermediate
11539 prev_mode
= vec_mode
;
11540 prev_type
= vectype
;
11541 if (code
== FIX_TRUNC_EXPR
)
11542 uns
= TYPE_UNSIGNED (vectype_out
);
11544 uns
= TYPE_UNSIGNED (vectype
);
11546 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11547 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11548 costly than signed. */
11549 if (code
== FIX_TRUNC_EXPR
&& uns
)
11551 enum insn_code icode2
;
11554 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
11556 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11557 if (interm_optab
!= unknown_optab
11558 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
11559 && insn_data
[icode1
].operand
[0].mode
11560 == insn_data
[icode2
].operand
[0].mode
)
11563 optab1
= interm_optab
;
11568 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11569 intermediate steps in promotion sequence. We try
11570 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11571 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11572 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11574 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11575 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11577 intermediate_type
= vect_double_mask_nunits (prev_type
);
11578 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
11583 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
11584 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11585 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11586 && intermediate_mode
== prev_mode
11587 && SCALAR_INT_MODE_P (prev_mode
))
11588 interm_optab
= vec_pack_sbool_trunc_optab
;
11591 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
11594 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
11595 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11596 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
11597 == CODE_FOR_nothing
))
11600 interm_types
->quick_push (intermediate_type
);
11601 (*multi_step_cvt
)++;
11603 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11605 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11607 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
11608 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11612 prev_mode
= intermediate_mode
;
11613 prev_type
= intermediate_type
;
11614 optab1
= interm_optab
;
11617 interm_types
->release ();
11621 /* Generate and return a statement that sets vector mask MASK such that
11622 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11625 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
11627 tree cmp_type
= TREE_TYPE (start_index
);
11628 tree mask_type
= TREE_TYPE (mask
);
11629 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
11630 cmp_type
, mask_type
,
11631 OPTIMIZE_FOR_SPEED
));
11632 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
11633 start_index
, end_index
,
11634 build_zero_cst (mask_type
));
11635 gimple_call_set_lhs (call
, mask
);
11639 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11640 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11643 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
11646 tree tmp
= make_ssa_name (mask_type
);
11647 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
11648 gimple_seq_add_stmt (seq
, call
);
11649 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
11652 /* Try to compute the vector types required to vectorize STMT_INFO,
11653 returning true on success and false if vectorization isn't possible.
11657 - Set *STMT_VECTYPE_OUT to:
11658 - NULL_TREE if the statement doesn't need to be vectorized;
11659 - boolean_type_node if the statement is a boolean operation whose
11660 vector type can only be determined once all the other vector types
11662 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11664 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11665 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11666 statement does not help to determine the overall number of units. */
11669 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
11670 tree
*stmt_vectype_out
,
11671 tree
*nunits_vectype_out
)
11673 gimple
*stmt
= stmt_info
->stmt
;
11675 *stmt_vectype_out
= NULL_TREE
;
11676 *nunits_vectype_out
= NULL_TREE
;
11678 if (gimple_get_lhs (stmt
) == NULL_TREE
11679 /* MASK_STORE has no lhs, but is ok. */
11680 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11682 if (is_a
<gcall
*> (stmt
))
11684 /* Ignore calls with no lhs. These must be calls to
11685 #pragma omp simd functions, and what vectorization factor
11686 it really needs can't be determined until
11687 vectorizable_simd_clone_call. */
11688 if (dump_enabled_p ())
11689 dump_printf_loc (MSG_NOTE
, vect_location
,
11690 "defer to SIMD clone analysis.\n");
11691 return opt_result::success ();
11694 return opt_result::failure_at (stmt
,
11695 "not vectorized: irregular stmt.%G", stmt
);
11698 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
11699 return opt_result::failure_at (stmt
,
11700 "not vectorized: vector stmt in loop:%G",
11704 tree scalar_type
= NULL_TREE
;
11705 if (STMT_VINFO_VECTYPE (stmt_info
))
11706 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11709 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
11710 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11711 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
11713 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
11715 /* Pure bool ops don't participate in number-of-units computation.
11716 For comparisons use the types being compared. */
11717 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
11718 && is_gimple_assign (stmt
)
11719 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
11721 *stmt_vectype_out
= boolean_type_node
;
11723 tree rhs1
= gimple_assign_rhs1 (stmt
);
11724 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
11725 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
11726 scalar_type
= TREE_TYPE (rhs1
);
11729 if (dump_enabled_p ())
11730 dump_printf_loc (MSG_NOTE
, vect_location
,
11731 "pure bool operation.\n");
11732 return opt_result::success ();
11736 if (dump_enabled_p ())
11737 dump_printf_loc (MSG_NOTE
, vect_location
,
11738 "get vectype for scalar type: %T\n", scalar_type
);
11739 vectype
= get_vectype_for_scalar_type (scalar_type
);
11741 return opt_result::failure_at (stmt
,
11743 " unsupported data-type %T\n",
11746 if (!*stmt_vectype_out
)
11747 *stmt_vectype_out
= vectype
;
11749 if (dump_enabled_p ())
11750 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
11753 /* Don't try to compute scalar types if the stmt produces a boolean
11754 vector; use the existing vector type instead. */
11755 tree nunits_vectype
;
11756 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
11757 nunits_vectype
= vectype
;
11760 /* The number of units is set according to the smallest scalar
11761 type (or the largest vector size, but we only support one
11762 vector size per vectorization). */
11763 if (*stmt_vectype_out
!= boolean_type_node
)
11765 HOST_WIDE_INT dummy
;
11766 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
11769 if (dump_enabled_p ())
11770 dump_printf_loc (MSG_NOTE
, vect_location
,
11771 "get vectype for scalar type: %T\n", scalar_type
);
11772 nunits_vectype
= get_vectype_for_scalar_type (scalar_type
);
11774 if (!nunits_vectype
)
11775 return opt_result::failure_at (stmt
,
11776 "not vectorized: unsupported data-type %T\n",
11779 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
11780 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
11781 return opt_result::failure_at (stmt
,
11782 "not vectorized: different sized vector "
11783 "types in statement, %T and %T\n",
11784 vectype
, nunits_vectype
);
11786 if (dump_enabled_p ())
11788 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n",
11791 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
11792 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
11793 dump_printf (MSG_NOTE
, "\n");
11796 *nunits_vectype_out
= nunits_vectype
;
11797 return opt_result::success ();
11800 /* Try to determine the correct vector type for STMT_INFO, which is a
11801 statement that produces a scalar boolean result. Return the vector
11802 type on success, otherwise return NULL_TREE. */
11805 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
11807 gimple
*stmt
= stmt_info
->stmt
;
11808 tree mask_type
= NULL
;
11809 tree vectype
, scalar_type
;
11811 if (is_gimple_assign (stmt
)
11812 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
11813 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
11815 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
11816 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
11819 return opt_tree::failure_at (stmt
,
11820 "not vectorized: unsupported mask\n");
11826 enum vect_def_type dt
;
11828 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
11830 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
11831 return opt_tree::failure_at (stmt
,
11832 "not vectorized:can't compute mask"
11833 " type for statement, %G", stmt
);
11835 /* No vectype probably means external definition.
11836 Allow it in case there is another operand which
11837 allows to determine mask type. */
11842 mask_type
= vectype
;
11843 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
11844 TYPE_VECTOR_SUBPARTS (vectype
)))
11845 return opt_tree::failure_at (stmt
,
11846 "not vectorized: different sized mask"
11847 " types in statement, %T and %T\n",
11848 mask_type
, vectype
);
11849 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
11850 != VECTOR_BOOLEAN_TYPE_P (vectype
))
11851 return opt_tree::failure_at (stmt
,
11852 "not vectorized: mixed mask and "
11853 "nonmask vector types in statement, "
11855 mask_type
, vectype
);
11858 /* We may compare boolean value loaded as vector of integers.
11859 Fix mask_type in such case. */
11861 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
11862 && gimple_code (stmt
) == GIMPLE_ASSIGN
11863 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
11864 mask_type
= build_same_sized_truth_vector_type (mask_type
);
11867 /* No mask_type should mean loop invariant predicate.
11868 This is probably a subject for optimization in if-conversion. */
11870 return opt_tree::failure_at (stmt
,
11871 "not vectorized: can't compute mask type "
11872 "for statement: %G", stmt
);
11874 return opt_tree::success (mask_type
);