1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 int misalign
, enum vect_cost_model_location where
)
97 if ((kind
== vector_load
|| kind
== unaligned_load
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_gather_load
;
100 if ((kind
== vector_store
|| kind
== unaligned_store
)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
102 kind
= vector_scatter_store
;
104 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
105 body_cost_vec
->safe_push (si
);
107 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
128 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
130 tree vect_type
, vect
, vect_name
, array_ref
;
133 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
134 vect_type
= TREE_TYPE (TREE_TYPE (array
));
135 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
136 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
137 build_int_cst (size_type_node
, n
),
138 NULL_TREE
, NULL_TREE
);
140 new_stmt
= gimple_build_assign (vect
, array_ref
);
141 vect_name
= make_ssa_name (vect
, new_stmt
);
142 gimple_assign_set_lhs (new_stmt
, vect_name
);
143 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
153 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
154 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
159 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
160 build_int_cst (size_type_node
, n
),
161 NULL_TREE
, NULL_TREE
);
163 new_stmt
= gimple_build_assign (array_ref
, vect
);
164 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
172 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
176 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
186 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree clobber
= build_clobber (TREE_TYPE (var
));
190 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
191 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
202 enum vect_relevant relevant
, bool live_p
)
204 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
205 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE
, vect_location
,
209 "mark relevant %d, live %d: %G", relevant
, live_p
,
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE
, vect_location
,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info
= stmt_info
;
228 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
230 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
231 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
234 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
235 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
236 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
238 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE
, vect_location
,
243 "already marked relevant/live.\n");
247 worklist
->safe_push (stmt_info
);
251 /* Function is_simple_and_all_uses_invariant
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
257 loop_vec_info loop_vinfo
)
262 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
266 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
268 enum vect_def_type dt
= vect_uninitialized_def
;
270 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
274 "use not simple.\n");
278 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
297 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
298 enum vect_relevant
*relevant
, bool *live_p
)
300 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
302 imm_use_iterator imm_iter
;
306 *relevant
= vect_unused_in_scope
;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info
->stmt
)
311 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
312 *relevant
= vect_used_in_scope
;
314 /* changing memory. */
315 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
316 if (gimple_vdef (stmt_info
->stmt
)
317 && !gimple_clobber_p (stmt_info
->stmt
))
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE
, vect_location
,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant
= vect_used_in_scope
;
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
328 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
330 basic_block bb
= gimple_bb (USE_STMT (use_p
));
331 if (!flow_bb_inside_loop_p (loop
, bb
))
333 if (dump_enabled_p ())
334 dump_printf_loc (MSG_NOTE
, vect_location
,
335 "vec_stmt_relevant_p: used out of loop.\n");
337 if (is_gimple_debug (USE_STMT (use_p
)))
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
343 gcc_assert (bb
== single_exit (loop
)->dest
);
350 if (*live_p
&& *relevant
== vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE
, vect_location
,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant
= vect_used_only_live
;
359 return (*live_p
|| *relevant
);
363 /* Function exist_non_indexing_operands_for_use_p
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
369 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info
))
379 /* STMT has a data_ref. FORNOW this means that its of one of
383 (This should have been verified in analyze_data_refs).
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
392 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
393 if (!assign
|| !gimple_assign_copy_p (assign
))
395 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
396 if (call
&& gimple_call_internal_p (call
))
398 internal_fn ifn
= gimple_call_internal_fn (call
);
399 int mask_index
= internal_fn_mask_index (ifn
);
401 && use
== gimple_call_arg (call
, mask_index
))
403 int stored_value_index
= internal_fn_stored_value_index (ifn
);
404 if (stored_value_index
>= 0
405 && use
== gimple_call_arg (call
, stored_value_index
))
407 if (internal_gather_scatter_fn_p (ifn
)
408 && use
== gimple_call_arg (call
, 1))
414 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
416 operand
= gimple_assign_rhs1 (assign
);
417 if (TREE_CODE (operand
) != SSA_NAME
)
428 Function process_use.
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
452 Return true if everything is as expected. Return false otherwise. */
455 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
456 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
459 stmt_vec_info dstmt_vinfo
;
460 basic_block bb
, def_bb
;
461 enum vect_def_type dt
;
463 /* case 1: we are only interested in uses that need to be vectorized. Uses
464 that are used for address computation are not considered relevant. */
465 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
466 return opt_result::success ();
468 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
469 return opt_result::failure_at (stmt_vinfo
->stmt
,
471 " unsupported use in stmt.\n");
474 return opt_result::success ();
476 def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 DSTMT_VINFO must have already been processed, because this should be the
480 only way that STMT, which is a reduction-phi, was put in the worklist,
481 as there should be no other uses for DSTMT_VINFO in the loop. So we just
482 check that everything is as expected, and we are done. */
483 bb
= gimple_bb (stmt_vinfo
->stmt
);
484 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
485 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
486 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
488 && bb
->loop_father
== def_bb
->loop_father
)
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE
, vect_location
,
492 "reduc-stmt defining reduc-phi in the same nest.\n");
493 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
494 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
495 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
496 return opt_result::success ();
499 /* case 3a: outer-loop stmt defining an inner-loop stmt:
500 outer-loop-header-bb:
506 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE
, vect_location
,
510 "outer-loop def-stmt defining inner-loop stmt.\n");
514 case vect_unused_in_scope
:
515 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
516 vect_used_in_scope
: vect_unused_in_scope
;
519 case vect_used_in_outer_by_reduction
:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
521 relevant
= vect_used_by_reduction
;
524 case vect_used_in_outer
:
525 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
526 relevant
= vect_used_in_scope
;
529 case vect_used_in_scope
:
537 /* case 3b: inner-loop stmt defining an outer-loop stmt:
538 outer-loop-header-bb:
542 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
544 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_NOTE
, vect_location
,
548 "inner-loop def-stmt defining outer-loop stmt.\n");
552 case vect_unused_in_scope
:
553 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
554 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
555 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
558 case vect_used_by_reduction
:
559 case vect_used_only_live
:
560 relevant
= vect_used_in_outer_by_reduction
;
563 case vect_used_in_scope
:
564 relevant
= vect_used_in_outer
;
571 /* We are also not interested in uses on loop PHI backedges that are
572 inductions. Otherwise we'll needlessly vectorize the IV increment
573 and cause hybrid SLP for SLP inductions. Unless the PHI is live
575 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
576 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
577 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
578 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
579 loop_latch_edge (bb
->loop_father
))
582 if (dump_enabled_p ())
583 dump_printf_loc (MSG_NOTE
, vect_location
,
584 "induction value on backedge.\n");
585 return opt_result::success ();
589 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
590 return opt_result::success ();
594 /* Function vect_mark_stmts_to_be_vectorized.
596 Not all stmts in the loop need to be vectorized. For example:
605 Stmt 1 and 3 do not need to be vectorized, because loop control and
606 addressing of vectorized data-refs are handled differently.
608 This pass detects such stmts. */
611 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
613 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
614 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
615 unsigned int nbbs
= loop
->num_nodes
;
616 gimple_stmt_iterator si
;
620 enum vect_relevant relevant
;
622 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
624 auto_vec
<stmt_vec_info
, 64> worklist
;
626 /* 1. Init worklist. */
627 for (i
= 0; i
< nbbs
; i
++)
630 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
632 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
637 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
638 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
640 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
642 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE
, vect_location
,
645 "init: stmt relevant? %G", stmt_info
->stmt
);
647 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
648 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
652 /* 2. Process_worklist */
653 while (worklist
.length () > 0)
658 stmt_vec_info stmt_vinfo
= worklist
.pop ();
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE
, vect_location
,
661 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
663 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
664 (DEF_STMT) as relevant/irrelevant according to the relevance property
666 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
668 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
669 propagated as is to the DEF_STMTs of its USEs.
671 One exception is when STMT has been identified as defining a reduction
672 variable; in this case we set the relevance to vect_used_by_reduction.
673 This is because we distinguish between two kinds of relevant stmts -
674 those that are used by a reduction computation, and those that are
675 (also) used by a regular computation. This allows us later on to
676 identify stmts that are used solely by a reduction, and therefore the
677 order of the results that they produce does not have to be kept. */
679 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
681 case vect_reduction_def
:
682 gcc_assert (relevant
!= vect_unused_in_scope
);
683 if (relevant
!= vect_unused_in_scope
684 && relevant
!= vect_used_in_scope
685 && relevant
!= vect_used_by_reduction
686 && relevant
!= vect_used_only_live
)
687 return opt_result::failure_at
688 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
691 case vect_nested_cycle
:
692 if (relevant
!= vect_unused_in_scope
693 && relevant
!= vect_used_in_outer_by_reduction
694 && relevant
!= vect_used_in_outer
)
695 return opt_result::failure_at
696 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
699 case vect_double_reduction_def
:
700 if (relevant
!= vect_unused_in_scope
701 && relevant
!= vect_used_by_reduction
702 && relevant
!= vect_used_only_live
)
703 return opt_result::failure_at
704 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
711 if (is_pattern_stmt_p (stmt_vinfo
))
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
718 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
719 tree op
= gimple_assign_rhs1 (assign
);
722 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
725 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
726 loop_vinfo
, relevant
, &worklist
, false);
729 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
730 loop_vinfo
, relevant
, &worklist
, false);
735 for (; i
< gimple_num_ops (assign
); i
++)
737 op
= gimple_op (assign
, i
);
738 if (TREE_CODE (op
) == SSA_NAME
)
741 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
748 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
750 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
752 tree arg
= gimple_call_arg (call
, i
);
754 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
762 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
764 tree op
= USE_FROM_PTR (use_p
);
766 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
772 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
774 gather_scatter_info gs_info
;
775 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
778 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
787 } /* while worklist */
789 return opt_result::success ();
792 /* Compute the prologue cost for invariant or constant operands. */
795 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
796 unsigned opno
, enum vect_def_type dt
,
797 stmt_vector_for_cost
*cost_vec
)
799 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
800 tree op
= gimple_op (stmt
, opno
);
801 unsigned prologue_cost
= 0;
803 /* Without looking at the actual initializer a vector of
804 constants can be implemented as load from the constant pool.
805 When all elements are the same we can use a splat. */
806 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
807 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
808 unsigned num_vects_to_check
;
809 unsigned HOST_WIDE_INT const_nunits
;
811 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
812 && ! multiple_p (const_nunits
, group_size
))
814 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 nelt_limit
= const_nunits
;
819 /* If either the vector has variable length or the vectors
820 are composed of repeated whole groups we only need to
821 cost construction once. All vectors will be the same. */
822 num_vects_to_check
= 1;
823 nelt_limit
= group_size
;
825 tree elt
= NULL_TREE
;
827 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
829 unsigned si
= j
% group_size
;
831 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
832 /* ??? We're just tracking whether all operands of a single
833 vector initializer are the same, ideally we'd check if
834 we emitted the same one already. */
835 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
839 if (nelt
== nelt_limit
)
841 /* ??? We need to pass down stmt_info for a vector type
842 even if it points to the wrong stmt. */
843 prologue_cost
+= record_stmt_cost
845 dt
== vect_external_def
846 ? (elt
? scalar_to_vec
: vec_construct
)
848 stmt_info
, 0, vect_prologue
);
853 return prologue_cost
;
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
863 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
864 enum vect_def_type
*dt
,
867 stmt_vector_for_cost
*cost_vec
)
869 int inside_cost
= 0, prologue_cost
= 0;
871 gcc_assert (cost_vec
!= NULL
);
873 /* ??? Somehow we need to fix this at the callers. */
875 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
879 /* Scan operands and account for prologue cost of constants/externals.
880 ??? This over-estimates cost for multiple uses and should be
882 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
883 tree lhs
= gimple_get_lhs (stmt
);
884 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
886 tree op
= gimple_op (stmt
, i
);
887 enum vect_def_type dt
;
888 if (!op
|| op
== lhs
)
890 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
891 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
892 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
897 /* Cost the "broadcast" of a scalar operand in to a vector operand.
898 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
900 for (int i
= 0; i
< ndts
; i
++)
901 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
902 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
903 stmt_info
, 0, vect_prologue
);
905 /* Adjust for two-operator SLP nodes. */
906 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
909 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
910 stmt_info
, 0, vect_body
);
913 /* Pass the inside-of-loop statements to the target-specific cost model. */
914 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
915 stmt_info
, 0, vect_body
);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE
, vect_location
,
919 "vect_model_simple_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
924 /* Model cost for type demotion and promotion operations. PWR is normally
925 zero for single-step promotions and demotions. It will be one if
926 two-step promotion/demotion is required, and so on. Each additional
927 step doubles the number of instructions required. */
930 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
931 enum vect_def_type
*dt
, int pwr
,
932 stmt_vector_for_cost
*cost_vec
)
935 int inside_cost
= 0, prologue_cost
= 0;
937 for (i
= 0; i
< pwr
+ 1; i
++)
939 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
941 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
942 vec_promote_demote
, stmt_info
, 0,
946 /* FORNOW: Assuming maximum 2 args per stmts. */
947 for (i
= 0; i
< 2; i
++)
948 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
949 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
950 stmt_info
, 0, vect_prologue
);
952 if (dump_enabled_p ())
953 dump_printf_loc (MSG_NOTE
, vect_location
,
954 "vect_model_promotion_demotion_cost: inside_cost = %d, "
955 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
958 /* Returns true if the current function returns DECL. */
961 cfun_returns (tree decl
)
965 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
967 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
970 if (gimple_return_retval (ret
) == decl
)
972 /* We often end up with an aggregate copy to the result decl,
973 handle that case as well. First skip intermediate clobbers
978 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
980 while (gimple_clobber_p (def
));
981 if (is_a
<gassign
*> (def
)
982 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
983 && gimple_assign_rhs1 (def
) == decl
)
989 /* Function vect_model_store_cost
991 Models cost for stores. In the case of grouped accesses, one access
992 has the overhead of the grouped access attributed to it. */
995 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
996 enum vect_def_type dt
,
997 vect_memory_access_type memory_access_type
,
998 vec_load_store_type vls_type
, slp_tree slp_node
,
999 stmt_vector_for_cost
*cost_vec
)
1001 unsigned int inside_cost
= 0, prologue_cost
= 0;
1002 stmt_vec_info first_stmt_info
= stmt_info
;
1003 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1005 /* ??? Somehow we need to fix this at the callers. */
1007 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1009 if (vls_type
== VLS_STORE_INVARIANT
)
1012 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
1015 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1016 stmt_info
, 0, vect_prologue
);
1019 /* Grouped stores update all elements in the group at once,
1020 so we want the DR for the first statement. */
1021 if (!slp_node
&& grouped_access_p
)
1022 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1024 /* True if we should include any once-per-group costs as well as
1025 the cost of the statement itself. For SLP we only get called
1026 once per group anyhow. */
1027 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1029 /* We assume that the cost of a single store-lanes instruction is
1030 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1031 access is instead being provided by a permute-and-store operation,
1032 include the cost of the permutes. */
1034 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1036 /* Uses a high and low interleave or shuffle operations for each
1038 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1039 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1040 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1041 stmt_info
, 0, vect_body
);
1043 if (dump_enabled_p ())
1044 dump_printf_loc (MSG_NOTE
, vect_location
,
1045 "vect_model_store_cost: strided group_size = %d .\n",
1049 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1050 /* Costs of the stores. */
1051 if (memory_access_type
== VMAT_ELEMENTWISE
1052 || memory_access_type
== VMAT_GATHER_SCATTER
)
1054 /* N scalar stores plus extracting the elements. */
1055 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1056 inside_cost
+= record_stmt_cost (cost_vec
,
1057 ncopies
* assumed_nunits
,
1058 scalar_store
, stmt_info
, 0, vect_body
);
1061 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1063 if (memory_access_type
== VMAT_ELEMENTWISE
1064 || memory_access_type
== VMAT_STRIDED_SLP
)
1066 /* N scalar stores plus extracting the elements. */
1067 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1068 inside_cost
+= record_stmt_cost (cost_vec
,
1069 ncopies
* assumed_nunits
,
1070 vec_to_scalar
, stmt_info
, 0, vect_body
);
1073 /* When vectorizing a store into the function result assign
1074 a penalty if the function returns in a multi-register location.
1075 In this case we assume we'll end up with having to spill the
1076 vector result and do piecewise loads as a conservative estimate. */
1077 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1079 && (TREE_CODE (base
) == RESULT_DECL
1080 || (DECL_P (base
) && cfun_returns (base
)))
1081 && !aggregate_value_p (base
, cfun
->decl
))
1083 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1084 /* ??? Handle PARALLEL in some way. */
1087 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1088 /* Assume that a single reg-reg move is possible and cheap,
1089 do not account for vector to gp register move cost. */
1093 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1095 stmt_info
, 0, vect_epilogue
);
1097 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1099 stmt_info
, 0, vect_epilogue
);
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE
, vect_location
,
1106 "vect_model_store_cost: inside_cost = %d, "
1107 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1111 /* Calculate cost of DR's memory access. */
1113 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1114 unsigned int *inside_cost
,
1115 stmt_vector_for_cost
*body_cost_vec
)
1117 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1118 int alignment_support_scheme
1119 = vect_supportable_dr_alignment (dr_info
, false);
1121 switch (alignment_support_scheme
)
1125 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1126 vector_store
, stmt_info
, 0,
1129 if (dump_enabled_p ())
1130 dump_printf_loc (MSG_NOTE
, vect_location
,
1131 "vect_model_store_cost: aligned.\n");
1135 case dr_unaligned_supported
:
1137 /* Here, we assign an additional cost for the unaligned store. */
1138 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1139 unaligned_store
, stmt_info
,
1140 DR_MISALIGNMENT (dr_info
),
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE
, vect_location
,
1144 "vect_model_store_cost: unaligned supported by "
1149 case dr_unaligned_unsupported
:
1151 *inside_cost
= VECT_MAX_COST
;
1153 if (dump_enabled_p ())
1154 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1155 "vect_model_store_cost: unsupported access.\n");
1165 /* Function vect_model_load_cost
1167 Models cost for loads. In the case of grouped accesses, one access has
1168 the overhead of the grouped access attributed to it. Since unaligned
1169 accesses are supported for loads, we also account for the costs of the
1170 access scheme chosen. */
1173 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1174 vect_memory_access_type memory_access_type
,
1175 slp_instance instance
,
1177 stmt_vector_for_cost
*cost_vec
)
1179 unsigned int inside_cost
= 0, prologue_cost
= 0;
1180 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1182 gcc_assert (cost_vec
);
1184 /* ??? Somehow we need to fix this at the callers. */
1186 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1188 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1190 /* If the load is permuted then the alignment is determined by
1191 the first group element not by the first scalar stmt DR. */
1192 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1193 /* Record the cost for the permutation. */
1195 unsigned assumed_nunits
1196 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1197 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1198 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1199 slp_vf
, instance
, true,
1201 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1202 first_stmt_info
, 0, vect_body
);
1203 /* And adjust the number of loads performed. This handles
1204 redundancies as well as loads that are later dead. */
1205 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1206 bitmap_clear (perm
);
1207 for (unsigned i
= 0;
1208 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1209 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1211 bool load_seen
= false;
1212 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1214 if (i
% assumed_nunits
== 0)
1220 if (bitmap_bit_p (perm
, i
))
1226 <= (DR_GROUP_SIZE (first_stmt_info
)
1227 - DR_GROUP_GAP (first_stmt_info
)
1228 + assumed_nunits
- 1) / assumed_nunits
);
1231 /* Grouped loads read all elements in the group at once,
1232 so we want the DR for the first statement. */
1233 stmt_vec_info first_stmt_info
= stmt_info
;
1234 if (!slp_node
&& grouped_access_p
)
1235 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1237 /* True if we should include any once-per-group costs as well as
1238 the cost of the statement itself. For SLP we only get called
1239 once per group anyhow. */
1240 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1242 /* We assume that the cost of a single load-lanes instruction is
1243 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1244 access is instead being provided by a load-and-permute operation,
1245 include the cost of the permutes. */
1247 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1249 /* Uses an even and odd extract operations or shuffle operations
1250 for each needed permute. */
1251 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1252 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1253 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1254 stmt_info
, 0, vect_body
);
1256 if (dump_enabled_p ())
1257 dump_printf_loc (MSG_NOTE
, vect_location
,
1258 "vect_model_load_cost: strided group_size = %d .\n",
1262 /* The loads themselves. */
1263 if (memory_access_type
== VMAT_ELEMENTWISE
1264 || memory_access_type
== VMAT_GATHER_SCATTER
)
1266 /* N scalar loads plus gathering them into a vector. */
1267 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1268 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1269 inside_cost
+= record_stmt_cost (cost_vec
,
1270 ncopies
* assumed_nunits
,
1271 scalar_load
, stmt_info
, 0, vect_body
);
1274 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1275 &inside_cost
, &prologue_cost
,
1276 cost_vec
, cost_vec
, true);
1277 if (memory_access_type
== VMAT_ELEMENTWISE
1278 || memory_access_type
== VMAT_STRIDED_SLP
)
1279 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1280 stmt_info
, 0, vect_body
);
1282 if (dump_enabled_p ())
1283 dump_printf_loc (MSG_NOTE
, vect_location
,
1284 "vect_model_load_cost: inside_cost = %d, "
1285 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1289 /* Calculate cost of DR's memory access. */
1291 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1292 bool add_realign_cost
, unsigned int *inside_cost
,
1293 unsigned int *prologue_cost
,
1294 stmt_vector_for_cost
*prologue_cost_vec
,
1295 stmt_vector_for_cost
*body_cost_vec
,
1296 bool record_prologue_costs
)
1298 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1299 int alignment_support_scheme
1300 = vect_supportable_dr_alignment (dr_info
, false);
1302 switch (alignment_support_scheme
)
1306 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1307 stmt_info
, 0, vect_body
);
1309 if (dump_enabled_p ())
1310 dump_printf_loc (MSG_NOTE
, vect_location
,
1311 "vect_model_load_cost: aligned.\n");
1315 case dr_unaligned_supported
:
1317 /* Here, we assign an additional cost for the unaligned load. */
1318 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1319 unaligned_load
, stmt_info
,
1320 DR_MISALIGNMENT (dr_info
),
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_NOTE
, vect_location
,
1325 "vect_model_load_cost: unaligned supported by "
1330 case dr_explicit_realign
:
1332 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1333 vector_load
, stmt_info
, 0, vect_body
);
1334 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1335 vec_perm
, stmt_info
, 0, vect_body
);
1337 /* FIXME: If the misalignment remains fixed across the iterations of
1338 the containing loop, the following cost should be added to the
1340 if (targetm
.vectorize
.builtin_mask_for_load
)
1341 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1342 stmt_info
, 0, vect_body
);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE
, vect_location
,
1346 "vect_model_load_cost: explicit realign\n");
1350 case dr_explicit_realign_optimized
:
1352 if (dump_enabled_p ())
1353 dump_printf_loc (MSG_NOTE
, vect_location
,
1354 "vect_model_load_cost: unaligned software "
1357 /* Unaligned software pipeline has a load of an address, an initial
1358 load, and possibly a mask operation to "prime" the loop. However,
1359 if this is an access in a group of loads, which provide grouped
1360 access, then the above cost should only be considered for one
1361 access in the group. Inside the loop, there is a load op
1362 and a realignment op. */
1364 if (add_realign_cost
&& record_prologue_costs
)
1366 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1367 vector_stmt
, stmt_info
,
1369 if (targetm
.vectorize
.builtin_mask_for_load
)
1370 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1371 vector_stmt
, stmt_info
,
1375 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1376 stmt_info
, 0, vect_body
);
1377 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1378 stmt_info
, 0, vect_body
);
1380 if (dump_enabled_p ())
1381 dump_printf_loc (MSG_NOTE
, vect_location
,
1382 "vect_model_load_cost: explicit realign optimized"
1388 case dr_unaligned_unsupported
:
1390 *inside_cost
= VECT_MAX_COST
;
1392 if (dump_enabled_p ())
1393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1394 "vect_model_load_cost: unsupported access.\n");
1403 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1404 the loop preheader for the vectorized stmt STMT_VINFO. */
1407 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1408 gimple_stmt_iterator
*gsi
)
1411 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1414 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1418 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1422 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1425 pe
= loop_preheader_edge (loop
);
1426 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1427 gcc_assert (!new_bb
);
1431 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1433 gimple_stmt_iterator gsi_bb_start
;
1435 gcc_assert (bb_vinfo
);
1436 bb
= BB_VINFO_BB (bb_vinfo
);
1437 gsi_bb_start
= gsi_after_labels (bb
);
1438 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1442 if (dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE
, vect_location
,
1444 "created new init_stmt: %G", new_stmt
);
1447 /* Function vect_init_vector.
1449 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1450 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1451 vector type a vector with all elements equal to VAL is created first.
1452 Place the initialization at BSI if it is not NULL. Otherwise, place the
1453 initialization at the loop preheader.
1454 Return the DEF of INIT_STMT.
1455 It will be used in the vectorization of STMT_INFO. */
1458 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1459 gimple_stmt_iterator
*gsi
)
1464 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1465 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1467 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1468 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1470 /* Scalar boolean value should be transformed into
1471 all zeros or all ones value before building a vector. */
1472 if (VECTOR_BOOLEAN_TYPE_P (type
))
1474 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1475 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1477 if (CONSTANT_CLASS_P (val
))
1478 val
= integer_zerop (val
) ? false_val
: true_val
;
1481 new_temp
= make_ssa_name (TREE_TYPE (type
));
1482 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1483 val
, true_val
, false_val
);
1484 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1490 gimple_seq stmts
= NULL
;
1491 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1492 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1493 TREE_TYPE (type
), val
);
1495 /* ??? Condition vectorization expects us to do
1496 promotion of invariant/external defs. */
1497 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1498 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1499 !gsi_end_p (gsi2
); )
1501 init_stmt
= gsi_stmt (gsi2
);
1502 gsi_remove (&gsi2
, false);
1503 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1507 val
= build_vector_from_val (type
, val
);
1510 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1511 init_stmt
= gimple_build_assign (new_temp
, val
);
1512 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1516 /* Function vect_get_vec_def_for_operand_1.
1518 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1519 with type DT that will be used in the vectorized stmt. */
1522 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1523 enum vect_def_type dt
)
1526 stmt_vec_info vec_stmt_info
;
1530 /* operand is a constant or a loop invariant. */
1531 case vect_constant_def
:
1532 case vect_external_def
:
1533 /* Code should use vect_get_vec_def_for_operand. */
1536 /* Operand is defined by a loop header phi. In case of nested
1537 cycles we also may have uses of the backedge def. */
1538 case vect_reduction_def
:
1539 case vect_double_reduction_def
:
1540 case vect_nested_cycle
:
1541 case vect_induction_def
:
1542 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1543 || dt
== vect_nested_cycle
);
1546 /* operand is defined inside the loop. */
1547 case vect_internal_def
:
1549 /* Get the def from the vectorized stmt. */
1550 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1551 /* Get vectorized pattern statement. */
1553 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1554 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1555 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1556 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1557 gcc_assert (vec_stmt_info
);
1558 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1559 vec_oprnd
= PHI_RESULT (phi
);
1561 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1571 /* Function vect_get_vec_def_for_operand.
1573 OP is an operand in STMT_VINFO. This function returns a (vector) def
1574 that will be used in the vectorized stmt for STMT_VINFO.
1576 In the case that OP is an SSA_NAME which is defined in the loop, then
1577 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1579 In case OP is an invariant or constant, a new stmt that creates a vector def
1580 needs to be introduced. VECTYPE may be used to specify a required type for
1581 vector invariant. */
1584 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1587 enum vect_def_type dt
;
1589 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1591 if (dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE
, vect_location
,
1593 "vect_get_vec_def_for_operand: %T\n", op
);
1595 stmt_vec_info def_stmt_info
;
1596 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1597 &def_stmt_info
, &def_stmt
);
1598 gcc_assert (is_simple_use
);
1599 if (def_stmt
&& dump_enabled_p ())
1600 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1602 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1604 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1608 vector_type
= vectype
;
1609 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1610 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1611 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1613 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1615 gcc_assert (vector_type
);
1616 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1619 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1623 /* Function vect_get_vec_def_for_stmt_copy
1625 Return a vector-def for an operand. This function is used when the
1626 vectorized stmt to be created (by the caller to this function) is a "copy"
1627 created in case the vectorized result cannot fit in one vector, and several
1628 copies of the vector-stmt are required. In this case the vector-def is
1629 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1630 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1633 In case the vectorization factor (VF) is bigger than the number
1634 of elements that can fit in a vectype (nunits), we have to generate
1635 more than one vector stmt to vectorize the scalar stmt. This situation
1636 arises when there are multiple data-types operated upon in the loop; the
1637 smallest data-type determines the VF, and as a result, when vectorizing
1638 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1639 vector stmt (each computing a vector of 'nunits' results, and together
1640 computing 'VF' results in each iteration). This function is called when
1641 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1642 which VF=16 and nunits=4, so the number of copies required is 4):
1644 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1646 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1647 VS1.1: vx.1 = memref1 VS1.2
1648 VS1.2: vx.2 = memref2 VS1.3
1649 VS1.3: vx.3 = memref3
1651 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1652 VSnew.1: vz1 = vx.1 + ... VSnew.2
1653 VSnew.2: vz2 = vx.2 + ... VSnew.3
1654 VSnew.3: vz3 = vx.3 + ...
1656 The vectorization of S1 is explained in vectorizable_load.
1657 The vectorization of S2:
1658 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1659 the function 'vect_get_vec_def_for_operand' is called to
1660 get the relevant vector-def for each operand of S2. For operand x it
1661 returns the vector-def 'vx.0'.
1663 To create the remaining copies of the vector-stmt (VSnew.j), this
1664 function is called to get the relevant vector-def for each operand. It is
1665 obtained from the respective VS1.j stmt, which is recorded in the
1666 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1668 For example, to obtain the vector-def 'vx.1' in order to create the
1669 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1670 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1671 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1672 and return its def ('vx.1').
1673 Overall, to create the above sequence this function will be called 3 times:
1674 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1675 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1676 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1679 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1681 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1683 /* Do nothing; can reuse same def. */
1686 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1687 gcc_assert (def_stmt_info
);
1688 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1689 vec_oprnd
= PHI_RESULT (phi
);
1691 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1696 /* Get vectorized definitions for the operands to create a copy of an original
1697 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1700 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1701 vec
<tree
> *vec_oprnds0
,
1702 vec
<tree
> *vec_oprnds1
)
1704 tree vec_oprnd
= vec_oprnds0
->pop ();
1706 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1707 vec_oprnds0
->quick_push (vec_oprnd
);
1709 if (vec_oprnds1
&& vec_oprnds1
->length ())
1711 vec_oprnd
= vec_oprnds1
->pop ();
1712 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1713 vec_oprnds1
->quick_push (vec_oprnd
);
1718 /* Get vectorized definitions for OP0 and OP1. */
1721 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1722 vec
<tree
> *vec_oprnds0
,
1723 vec
<tree
> *vec_oprnds1
,
1728 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1729 auto_vec
<tree
> ops (nops
);
1730 auto_vec
<vec
<tree
> > vec_defs (nops
);
1732 ops
.quick_push (op0
);
1734 ops
.quick_push (op1
);
1736 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1738 *vec_oprnds0
= vec_defs
[0];
1740 *vec_oprnds1
= vec_defs
[1];
1746 vec_oprnds0
->create (1);
1747 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1748 vec_oprnds0
->quick_push (vec_oprnd
);
1752 vec_oprnds1
->create (1);
1753 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1754 vec_oprnds1
->quick_push (vec_oprnd
);
1759 /* Helper function called by vect_finish_replace_stmt and
1760 vect_finish_stmt_generation. Set the location of the new
1761 statement and create and return a stmt_vec_info for it. */
1763 static stmt_vec_info
1764 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1766 vec_info
*vinfo
= stmt_info
->vinfo
;
1768 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1773 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1775 /* While EH edges will generally prevent vectorization, stmt might
1776 e.g. be in a must-not-throw region. Ensure newly created stmts
1777 that could throw are part of the same region. */
1778 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1779 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1780 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1782 return vec_stmt_info
;
1785 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1786 which sets the same scalar result as STMT_INFO did. Create and return a
1787 stmt_vec_info for VEC_STMT. */
1790 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1792 gcc_assert (gimple_get_lhs (stmt_info
->stmt
) == gimple_get_lhs (vec_stmt
));
1794 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt_info
->stmt
);
1795 gsi_replace (&gsi
, vec_stmt
, true);
1797 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1800 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1801 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1804 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1805 gimple_stmt_iterator
*gsi
)
1807 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1809 if (!gsi_end_p (*gsi
)
1810 && gimple_has_mem_ops (vec_stmt
))
1812 gimple
*at_stmt
= gsi_stmt (*gsi
);
1813 tree vuse
= gimple_vuse (at_stmt
);
1814 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1816 tree vdef
= gimple_vdef (at_stmt
);
1817 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1818 /* If we have an SSA vuse and insert a store, update virtual
1819 SSA form to avoid triggering the renamer. Do so only
1820 if we can easily see all uses - which is what almost always
1821 happens with the way vectorized stmts are inserted. */
1822 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1823 && ((is_gimple_assign (vec_stmt
)
1824 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1825 || (is_gimple_call (vec_stmt
)
1826 && !(gimple_call_flags (vec_stmt
)
1827 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1829 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1830 gimple_set_vdef (vec_stmt
, new_vdef
);
1831 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1835 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1836 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1839 /* We want to vectorize a call to combined function CFN with function
1840 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1841 as the types of all inputs. Check whether this is possible using
1842 an internal function, returning its code if so or IFN_LAST if not. */
1845 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1846 tree vectype_out
, tree vectype_in
)
1849 if (internal_fn_p (cfn
))
1850 ifn
= as_internal_fn (cfn
);
1852 ifn
= associated_internal_fn (fndecl
);
1853 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1855 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1856 if (info
.vectorizable
)
1858 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1859 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1860 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1861 OPTIMIZE_FOR_SPEED
))
1869 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1870 gimple_stmt_iterator
*);
1872 /* Check whether a load or store statement in the loop described by
1873 LOOP_VINFO is possible in a fully-masked loop. This is testing
1874 whether the vectorizer pass has the appropriate support, as well as
1875 whether the target does.
1877 VLS_TYPE says whether the statement is a load or store and VECTYPE
1878 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1879 says how the load or store is going to be implemented and GROUP_SIZE
1880 is the number of load or store statements in the containing group.
1881 If the access is a gather load or scatter store, GS_INFO describes
1884 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1885 supported, otherwise record the required mask types. */
1888 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1889 vec_load_store_type vls_type
, int group_size
,
1890 vect_memory_access_type memory_access_type
,
1891 gather_scatter_info
*gs_info
)
1893 /* Invariant loads need no special support. */
1894 if (memory_access_type
== VMAT_INVARIANT
)
1897 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1898 machine_mode vecmode
= TYPE_MODE (vectype
);
1899 bool is_load
= (vls_type
== VLS_LOAD
);
1900 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1903 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1904 : !vect_store_lanes_supported (vectype
, group_size
, true))
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1908 "can't use a fully-masked loop because the"
1909 " target doesn't have an appropriate masked"
1910 " load/store-lanes instruction.\n");
1911 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1914 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1915 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1919 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1921 internal_fn ifn
= (is_load
1922 ? IFN_MASK_GATHER_LOAD
1923 : IFN_MASK_SCATTER_STORE
);
1924 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1925 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1926 gs_info
->memory_type
,
1927 TYPE_SIGN (offset_type
),
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1932 "can't use a fully-masked loop because the"
1933 " target doesn't have an appropriate masked"
1934 " gather load or scatter store instruction.\n");
1935 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1938 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1939 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1943 if (memory_access_type
!= VMAT_CONTIGUOUS
1944 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1946 /* Element X of the data must come from iteration i * VF + X of the
1947 scalar loop. We need more work to support other mappings. */
1948 if (dump_enabled_p ())
1949 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1950 "can't use a fully-masked loop because an access"
1951 " isn't contiguous.\n");
1952 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1956 machine_mode mask_mode
;
1957 if (!(targetm
.vectorize
.get_mask_mode
1958 (GET_MODE_NUNITS (vecmode
),
1959 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1960 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1962 if (dump_enabled_p ())
1963 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1964 "can't use a fully-masked loop because the target"
1965 " doesn't have the appropriate masked load or"
1967 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1970 /* We might load more scalars than we need for permuting SLP loads.
1971 We checked in get_group_load_store_type that the extra elements
1972 don't leak into a new vector. */
1973 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1974 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1975 unsigned int nvectors
;
1976 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1977 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1982 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1983 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1984 that needs to be applied to all loads and stores in a vectorized loop.
1985 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1987 MASK_TYPE is the type of both masks. If new statements are needed,
1988 insert them before GSI. */
1991 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1992 gimple_stmt_iterator
*gsi
)
1994 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1998 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1999 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
2000 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
2001 vec_mask
, loop_mask
);
2002 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
2006 /* Determine whether we can use a gather load or scatter store to vectorize
2007 strided load or store STMT_INFO by truncating the current offset to a
2008 smaller width. We need to be able to construct an offset vector:
2010 { 0, X, X*2, X*3, ... }
2012 without loss of precision, where X is STMT_INFO's DR_STEP.
2014 Return true if this is possible, describing the gather load or scatter
2015 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2018 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
2019 loop_vec_info loop_vinfo
, bool masked_p
,
2020 gather_scatter_info
*gs_info
)
2022 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2023 data_reference
*dr
= dr_info
->dr
;
2024 tree step
= DR_STEP (dr
);
2025 if (TREE_CODE (step
) != INTEGER_CST
)
2027 /* ??? Perhaps we could use range information here? */
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE
, vect_location
,
2030 "cannot truncate variable step.\n");
2034 /* Get the number of bits in an element. */
2035 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2036 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2037 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2039 /* Set COUNT to the upper limit on the number of elements - 1.
2040 Start with the maximum vectorization factor. */
2041 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2043 /* Try lowering COUNT to the number of scalar latch iterations. */
2044 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2045 widest_int max_iters
;
2046 if (max_loop_iterations (loop
, &max_iters
)
2047 && max_iters
< count
)
2048 count
= max_iters
.to_shwi ();
2050 /* Try scales of 1 and the element size. */
2051 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
2052 wi::overflow_type overflow
= wi::OVF_NONE
;
2053 for (int i
= 0; i
< 2; ++i
)
2055 int scale
= scales
[i
];
2057 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2060 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2061 in OFFSET_BITS bits. */
2062 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2065 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2066 if (wi::min_precision (range
, sign
) > element_bits
)
2068 overflow
= wi::OVF_UNKNOWN
;
2072 /* See whether the target supports the operation. */
2073 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2074 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2075 memory_type
, element_bits
, sign
, scale
,
2076 &gs_info
->ifn
, &gs_info
->element_type
))
2079 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2082 gs_info
->decl
= NULL_TREE
;
2083 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2084 but we don't need to store that here. */
2085 gs_info
->base
= NULL_TREE
;
2086 gs_info
->offset
= fold_convert (offset_type
, step
);
2087 gs_info
->offset_dt
= vect_constant_def
;
2088 gs_info
->offset_vectype
= NULL_TREE
;
2089 gs_info
->scale
= scale
;
2090 gs_info
->memory_type
= memory_type
;
2094 if (overflow
&& dump_enabled_p ())
2095 dump_printf_loc (MSG_NOTE
, vect_location
,
2096 "truncating gather/scatter offset to %d bits"
2097 " might change its value.\n", element_bits
);
2102 /* Return true if we can use gather/scatter internal functions to
2103 vectorize STMT_INFO, which is a grouped or strided load or store.
2104 MASKED_P is true if load or store is conditional. When returning
2105 true, fill in GS_INFO with the information required to perform the
2109 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2110 loop_vec_info loop_vinfo
, bool masked_p
,
2111 gather_scatter_info
*gs_info
)
2113 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2115 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2118 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2119 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2120 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2121 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2123 /* Enforced by vect_check_gather_scatter. */
2124 gcc_assert (element_bits
>= offset_bits
);
2126 /* If the elements are wider than the offset, convert the offset to the
2127 same width, without changing its sign. */
2128 if (element_bits
> offset_bits
)
2130 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2131 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2132 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2135 if (dump_enabled_p ())
2136 dump_printf_loc (MSG_NOTE
, vect_location
,
2137 "using gather/scatter for strided/grouped access,"
2138 " scale = %d\n", gs_info
->scale
);
2143 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2144 elements with a known constant step. Return -1 if that step
2145 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2148 compare_step_with_zero (stmt_vec_info stmt_info
)
2150 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2151 return tree_int_cst_compare (vect_dr_behavior (dr_info
)->step
,
2155 /* If the target supports a permute mask that reverses the elements in
2156 a vector of type VECTYPE, return that mask, otherwise return null. */
2159 perm_mask_for_reverse (tree vectype
)
2161 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2163 /* The encoding has a single stepped pattern. */
2164 vec_perm_builder
sel (nunits
, 1, 3);
2165 for (int i
= 0; i
< 3; ++i
)
2166 sel
.quick_push (nunits
- 1 - i
);
2168 vec_perm_indices
indices (sel
, 1, nunits
);
2169 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2171 return vect_gen_perm_mask_checked (vectype
, indices
);
2174 /* STMT_INFO is either a masked or unconditional store. Return the value
2178 vect_get_store_rhs (stmt_vec_info stmt_info
)
2180 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2182 gcc_assert (gimple_assign_single_p (assign
));
2183 return gimple_assign_rhs1 (assign
);
2185 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2187 internal_fn ifn
= gimple_call_internal_fn (call
);
2188 int index
= internal_fn_stored_value_index (ifn
);
2189 gcc_assert (index
>= 0);
2190 return gimple_call_arg (call
, index
);
2195 /* A subroutine of get_load_store_type, with a subset of the same
2196 arguments. Handle the case where STMT_INFO is part of a grouped load
2199 For stores, the statements in the group are all consecutive
2200 and there is no gap at the end. For loads, the statements in the
2201 group might not be consecutive; there can be gaps between statements
2202 as well as at the end. */
2205 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2206 bool masked_p
, vec_load_store_type vls_type
,
2207 vect_memory_access_type
*memory_access_type
,
2208 gather_scatter_info
*gs_info
)
2210 vec_info
*vinfo
= stmt_info
->vinfo
;
2211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2212 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2213 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2214 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2215 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2216 bool single_element_p
= (stmt_info
== first_stmt_info
2217 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2218 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2219 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2221 /* True if the vectorized statements would access beyond the last
2222 statement in the group. */
2223 bool overrun_p
= false;
2225 /* True if we can cope with such overrun by peeling for gaps, so that
2226 there is at least one final scalar iteration after the vector loop. */
2227 bool can_overrun_p
= (!masked_p
2228 && vls_type
== VLS_LOAD
2232 /* There can only be a gap at the end of the group if the stride is
2233 known at compile time. */
2234 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2236 /* Stores can't yet have gaps. */
2237 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2241 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2243 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2244 separated by the stride, until we have a complete vector.
2245 Fall back to scalar accesses if that isn't possible. */
2246 if (multiple_p (nunits
, group_size
))
2247 *memory_access_type
= VMAT_STRIDED_SLP
;
2249 *memory_access_type
= VMAT_ELEMENTWISE
;
2253 overrun_p
= loop_vinfo
&& gap
!= 0;
2254 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2257 "Grouped store with gaps requires"
2258 " non-consecutive accesses\n");
2261 /* An overrun is fine if the trailing elements are smaller
2262 than the alignment boundary B. Every vector access will
2263 be a multiple of B and so we are guaranteed to access a
2264 non-gap element in the same B-sized block. */
2266 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2267 / vect_get_scalar_dr_size (first_dr_info
)))
2270 /* If the gap splits the vector in half and the target
2271 can do half-vector operations avoid the epilogue peeling
2272 by simply loading half of the vector only. Usually
2273 the construction with an upper zero half will be elided. */
2274 dr_alignment_support alignment_support_scheme
;
2275 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2279 && (((alignment_support_scheme
2280 = vect_supportable_dr_alignment (first_dr_info
, false)))
2282 || alignment_support_scheme
== dr_unaligned_supported
)
2283 && known_eq (nunits
, (group_size
- gap
) * 2)
2284 && known_eq (nunits
, group_size
)
2285 && mode_for_vector (elmode
, (group_size
- gap
)).exists (&vmode
)
2286 && VECTOR_MODE_P (vmode
)
2287 && targetm
.vector_mode_supported_p (vmode
)
2288 && (convert_optab_handler (vec_init_optab
,
2289 TYPE_MODE (vectype
), vmode
)
2290 != CODE_FOR_nothing
))
2293 if (overrun_p
&& !can_overrun_p
)
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2297 "Peeling for outer loop is not supported\n");
2300 *memory_access_type
= VMAT_CONTIGUOUS
;
2305 /* We can always handle this case using elementwise accesses,
2306 but see if something more efficient is available. */
2307 *memory_access_type
= VMAT_ELEMENTWISE
;
2309 /* If there is a gap at the end of the group then these optimizations
2310 would access excess elements in the last iteration. */
2311 bool would_overrun_p
= (gap
!= 0);
2312 /* An overrun is fine if the trailing elements are smaller than the
2313 alignment boundary B. Every vector access will be a multiple of B
2314 and so we are guaranteed to access a non-gap element in the
2315 same B-sized block. */
2318 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2319 / vect_get_scalar_dr_size (first_dr_info
)))
2320 would_overrun_p
= false;
2322 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2323 && (can_overrun_p
|| !would_overrun_p
)
2324 && compare_step_with_zero (stmt_info
) > 0)
2326 /* First cope with the degenerate case of a single-element
2328 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2329 *memory_access_type
= VMAT_CONTIGUOUS
;
2331 /* Otherwise try using LOAD/STORE_LANES. */
2332 if (*memory_access_type
== VMAT_ELEMENTWISE
2333 && (vls_type
== VLS_LOAD
2334 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2335 : vect_store_lanes_supported (vectype
, group_size
,
2338 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2339 overrun_p
= would_overrun_p
;
2342 /* If that fails, try using permuting loads. */
2343 if (*memory_access_type
== VMAT_ELEMENTWISE
2344 && (vls_type
== VLS_LOAD
2345 ? vect_grouped_load_supported (vectype
, single_element_p
,
2347 : vect_grouped_store_supported (vectype
, group_size
)))
2349 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2350 overrun_p
= would_overrun_p
;
2354 /* As a last resort, trying using a gather load or scatter store.
2356 ??? Although the code can handle all group sizes correctly,
2357 it probably isn't a win to use separate strided accesses based
2358 on nearby locations. Or, even if it's a win over scalar code,
2359 it might not be a win over vectorizing at a lower VF, if that
2360 allows us to use contiguous accesses. */
2361 if (*memory_access_type
== VMAT_ELEMENTWISE
2364 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2366 *memory_access_type
= VMAT_GATHER_SCATTER
;
2369 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2371 /* STMT is the leader of the group. Check the operands of all the
2372 stmts of the group. */
2373 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2374 while (next_stmt_info
)
2376 tree op
= vect_get_store_rhs (next_stmt_info
);
2377 enum vect_def_type dt
;
2378 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2382 "use not simple.\n");
2385 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2391 gcc_assert (can_overrun_p
);
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2394 "Data access with gaps requires scalar "
2396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2402 /* A subroutine of get_load_store_type, with a subset of the same
2403 arguments. Handle the case where STMT_INFO is a load or store that
2404 accesses consecutive elements with a negative step. */
2406 static vect_memory_access_type
2407 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2408 vec_load_store_type vls_type
,
2409 unsigned int ncopies
)
2411 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2412 dr_alignment_support alignment_support_scheme
;
2416 if (dump_enabled_p ())
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2418 "multiple types with negative step.\n");
2419 return VMAT_ELEMENTWISE
;
2422 alignment_support_scheme
= vect_supportable_dr_alignment (dr_info
, false);
2423 if (alignment_support_scheme
!= dr_aligned
2424 && alignment_support_scheme
!= dr_unaligned_supported
)
2426 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2428 "negative step but alignment required.\n");
2429 return VMAT_ELEMENTWISE
;
2432 if (vls_type
== VLS_STORE_INVARIANT
)
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_NOTE
, vect_location
,
2436 "negative step with invariant source;"
2437 " no permute needed.\n");
2438 return VMAT_CONTIGUOUS_DOWN
;
2441 if (!perm_mask_for_reverse (vectype
))
2443 if (dump_enabled_p ())
2444 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2445 "negative step and reversing not supported.\n");
2446 return VMAT_ELEMENTWISE
;
2449 return VMAT_CONTIGUOUS_REVERSE
;
2452 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2453 if there is a memory access type that the vectorized form can use,
2454 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2455 or scatters, fill in GS_INFO accordingly.
2457 SLP says whether we're performing SLP rather than loop vectorization.
2458 MASKED_P is true if the statement is conditional on a vectorized mask.
2459 VECTYPE is the vector type that the vectorized statements will use.
2460 NCOPIES is the number of vector statements that will be needed. */
2463 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2464 bool masked_p
, vec_load_store_type vls_type
,
2465 unsigned int ncopies
,
2466 vect_memory_access_type
*memory_access_type
,
2467 gather_scatter_info
*gs_info
)
2469 vec_info
*vinfo
= stmt_info
->vinfo
;
2470 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2471 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2474 *memory_access_type
= VMAT_GATHER_SCATTER
;
2475 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2477 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2478 &gs_info
->offset_dt
,
2479 &gs_info
->offset_vectype
))
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2483 "%s index use not simple.\n",
2484 vls_type
== VLS_LOAD
? "gather" : "scatter");
2488 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2490 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2491 vls_type
, memory_access_type
, gs_info
))
2494 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2498 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2500 *memory_access_type
= VMAT_GATHER_SCATTER
;
2502 *memory_access_type
= VMAT_ELEMENTWISE
;
2506 int cmp
= compare_step_with_zero (stmt_info
);
2508 *memory_access_type
= get_negative_load_store_type
2509 (stmt_info
, vectype
, vls_type
, ncopies
);
2512 gcc_assert (vls_type
== VLS_LOAD
);
2513 *memory_access_type
= VMAT_INVARIANT
;
2516 *memory_access_type
= VMAT_CONTIGUOUS
;
2519 if ((*memory_access_type
== VMAT_ELEMENTWISE
2520 || *memory_access_type
== VMAT_STRIDED_SLP
)
2521 && !nunits
.is_constant ())
2523 if (dump_enabled_p ())
2524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2525 "Not using elementwise accesses due to variable "
2526 "vectorization factor.\n");
2530 /* FIXME: At the moment the cost model seems to underestimate the
2531 cost of using elementwise accesses. This check preserves the
2532 traditional behavior until that can be fixed. */
2533 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2534 if (!first_stmt_info
)
2535 first_stmt_info
= stmt_info
;
2536 if (*memory_access_type
== VMAT_ELEMENTWISE
2537 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2538 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2539 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2540 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2542 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2544 "not falling back to elementwise accesses\n");
2550 /* Return true if boolean argument MASK is suitable for vectorizing
2551 conditional load or store STMT_INFO. When returning true, store the type
2552 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2553 in *MASK_VECTYPE_OUT. */
2556 vect_check_load_store_mask (stmt_vec_info stmt_info
, tree mask
,
2557 vect_def_type
*mask_dt_out
,
2558 tree
*mask_vectype_out
)
2560 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2564 "mask argument is not a boolean.\n");
2568 if (TREE_CODE (mask
) != SSA_NAME
)
2570 if (dump_enabled_p ())
2571 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2572 "mask argument is not an SSA name.\n");
2576 enum vect_def_type mask_dt
;
2578 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2582 "mask use not simple.\n");
2586 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2588 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2590 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2592 if (dump_enabled_p ())
2593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2594 "could not find an appropriate vector mask type.\n");
2598 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2599 TYPE_VECTOR_SUBPARTS (vectype
)))
2601 if (dump_enabled_p ())
2602 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2603 "vector mask type %T"
2604 " does not match vector data type %T.\n",
2605 mask_vectype
, vectype
);
2610 *mask_dt_out
= mask_dt
;
2611 *mask_vectype_out
= mask_vectype
;
2615 /* Return true if stored value RHS is suitable for vectorizing store
2616 statement STMT_INFO. When returning true, store the type of the
2617 definition in *RHS_DT_OUT, the type of the vectorized store value in
2618 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2621 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2622 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2623 vec_load_store_type
*vls_type_out
)
2625 /* In the case this is a store from a constant make sure
2626 native_encode_expr can handle it. */
2627 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2629 if (dump_enabled_p ())
2630 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2631 "cannot encode constant as a byte sequence.\n");
2635 enum vect_def_type rhs_dt
;
2637 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2641 "use not simple.\n");
2645 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2646 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2650 "incompatible vector types.\n");
2654 *rhs_dt_out
= rhs_dt
;
2655 *rhs_vectype_out
= rhs_vectype
;
2656 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2657 *vls_type_out
= VLS_STORE_INVARIANT
;
2659 *vls_type_out
= VLS_STORE
;
2663 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2664 Note that we support masks with floating-point type, in which case the
2665 floats are interpreted as a bitmask. */
2668 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2670 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2671 return build_int_cst (masktype
, -1);
2672 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2674 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2675 mask
= build_vector_from_val (masktype
, mask
);
2676 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2678 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2682 for (int j
= 0; j
< 6; ++j
)
2684 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2685 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2686 mask
= build_vector_from_val (masktype
, mask
);
2687 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2692 /* Build an all-zero merge value of type VECTYPE while vectorizing
2693 STMT_INFO as a gather load. */
2696 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2699 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2700 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2701 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2705 for (int j
= 0; j
< 6; ++j
)
2707 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2708 merge
= build_real (TREE_TYPE (vectype
), r
);
2712 merge
= build_vector_from_val (vectype
, merge
);
2713 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2716 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2717 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2718 the gather load operation. If the load is conditional, MASK is the
2719 unvectorized condition and MASK_DT is its definition type, otherwise
2723 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2724 gimple_stmt_iterator
*gsi
,
2725 stmt_vec_info
*vec_stmt
,
2726 gather_scatter_info
*gs_info
,
2729 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2730 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2731 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2732 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2733 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2734 edge pe
= loop_preheader_edge (loop
);
2735 enum { NARROW
, NONE
, WIDEN
} modifier
;
2736 poly_uint64 gather_off_nunits
2737 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2739 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2740 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2741 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2742 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2743 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2744 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2745 tree scaletype
= TREE_VALUE (arglist
);
2746 tree real_masktype
= masktype
;
2747 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2749 || TREE_CODE (masktype
) == INTEGER_TYPE
2750 || types_compatible_p (srctype
, masktype
)));
2751 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2752 masktype
= build_same_sized_truth_vector_type (srctype
);
2754 tree mask_halftype
= masktype
;
2755 tree perm_mask
= NULL_TREE
;
2756 tree mask_perm_mask
= NULL_TREE
;
2757 if (known_eq (nunits
, gather_off_nunits
))
2759 else if (known_eq (nunits
* 2, gather_off_nunits
))
2763 /* Currently widening gathers and scatters are only supported for
2764 fixed-length vectors. */
2765 int count
= gather_off_nunits
.to_constant ();
2766 vec_perm_builder
sel (count
, count
, 1);
2767 for (int i
= 0; i
< count
; ++i
)
2768 sel
.quick_push (i
| (count
/ 2));
2770 vec_perm_indices
indices (sel
, 1, count
);
2771 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2774 else if (known_eq (nunits
, gather_off_nunits
* 2))
2778 /* Currently narrowing gathers and scatters are only supported for
2779 fixed-length vectors. */
2780 int count
= nunits
.to_constant ();
2781 vec_perm_builder
sel (count
, count
, 1);
2782 sel
.quick_grow (count
);
2783 for (int i
= 0; i
< count
; ++i
)
2784 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2785 vec_perm_indices
indices (sel
, 2, count
);
2786 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2790 if (mask
&& masktype
== real_masktype
)
2792 for (int i
= 0; i
< count
; ++i
)
2793 sel
[i
] = i
| (count
/ 2);
2794 indices
.new_vector (sel
, 2, count
);
2795 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2799 = build_same_sized_truth_vector_type (gs_info
->offset_vectype
);
2804 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2805 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2807 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2808 if (!is_gimple_min_invariant (ptr
))
2811 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2812 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2813 gcc_assert (!new_bb
);
2816 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2818 tree vec_oprnd0
= NULL_TREE
;
2819 tree vec_mask
= NULL_TREE
;
2820 tree src_op
= NULL_TREE
;
2821 tree mask_op
= NULL_TREE
;
2822 tree prev_res
= NULL_TREE
;
2823 stmt_vec_info prev_stmt_info
= NULL
;
2827 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2828 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2831 for (int j
= 0; j
< ncopies
; ++j
)
2834 if (modifier
== WIDEN
&& (j
& 1))
2835 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2836 perm_mask
, stmt_info
, gsi
);
2839 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2841 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2844 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2846 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2847 TYPE_VECTOR_SUBPARTS (idxtype
)));
2848 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2849 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2850 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2851 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2857 if (mask_perm_mask
&& (j
& 1))
2858 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2859 mask_perm_mask
, stmt_info
, gsi
);
2863 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2864 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2865 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2869 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2871 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2872 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2873 gcc_assert (known_eq (sub1
, sub2
));
2874 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2875 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2877 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2878 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2882 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2884 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2886 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2887 : VEC_UNPACK_LO_EXPR
,
2889 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2895 tree mask_arg
= mask_op
;
2896 if (masktype
!= real_masktype
)
2898 tree utype
, optype
= TREE_TYPE (mask_op
);
2899 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2900 utype
= real_masktype
;
2902 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2903 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2904 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2906 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2907 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2909 if (!useless_type_conversion_p (real_masktype
, utype
))
2911 gcc_assert (TYPE_PRECISION (utype
)
2912 <= TYPE_PRECISION (real_masktype
));
2913 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2914 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2915 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2918 src_op
= build_zero_cst (srctype
);
2920 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2923 stmt_vec_info new_stmt_info
;
2924 if (!useless_type_conversion_p (vectype
, rettype
))
2926 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2927 TYPE_VECTOR_SUBPARTS (rettype
)));
2928 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2929 gimple_call_set_lhs (new_call
, op
);
2930 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2931 var
= make_ssa_name (vec_dest
);
2932 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2933 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2935 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2939 var
= make_ssa_name (vec_dest
, new_call
);
2940 gimple_call_set_lhs (new_call
, var
);
2942 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2945 if (modifier
== NARROW
)
2952 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2954 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2957 if (prev_stmt_info
== NULL
)
2958 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2960 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2961 prev_stmt_info
= new_stmt_info
;
2965 /* Prepare the base and offset in GS_INFO for vectorization.
2966 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2967 to the vectorized offset argument for the first copy of STMT_INFO.
2968 STMT_INFO is the statement described by GS_INFO and LOOP is the
2972 vect_get_gather_scatter_ops (struct loop
*loop
, stmt_vec_info stmt_info
,
2973 gather_scatter_info
*gs_info
,
2974 tree
*dataref_ptr
, tree
*vec_offset
)
2976 gimple_seq stmts
= NULL
;
2977 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2981 edge pe
= loop_preheader_edge (loop
);
2982 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2983 gcc_assert (!new_bb
);
2985 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2986 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2987 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2991 /* Prepare to implement a grouped or strided load or store using
2992 the gather load or scatter store operation described by GS_INFO.
2993 STMT_INFO is the load or store statement.
2995 Set *DATAREF_BUMP to the amount that should be added to the base
2996 address after each copy of the vectorized statement. Set *VEC_OFFSET
2997 to an invariant offset vector in which element I has the value
2998 I * DR_STEP / SCALE. */
3001 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3002 loop_vec_info loop_vinfo
,
3003 gather_scatter_info
*gs_info
,
3004 tree
*dataref_bump
, tree
*vec_offset
)
3006 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3007 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3008 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3011 tree bump
= size_binop (MULT_EXPR
,
3012 fold_convert (sizetype
, DR_STEP (dr
)),
3013 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3014 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
3016 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3018 /* The offset given in GS_INFO can have pointer type, so use the element
3019 type of the vector instead. */
3020 tree offset_type
= TREE_TYPE (gs_info
->offset
);
3021 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
3022 offset_type
= TREE_TYPE (offset_vectype
);
3024 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3025 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
3026 ssize_int (gs_info
->scale
));
3027 step
= fold_convert (offset_type
, step
);
3028 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
3030 /* Create {0, X, X*2, X*3, ...}. */
3031 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
3032 build_zero_cst (offset_type
), step
);
3034 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3037 /* Return the amount that should be added to a vector pointer to move
3038 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3039 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3043 vect_get_data_ptr_increment (dr_vec_info
*dr_info
, tree aggr_type
,
3044 vect_memory_access_type memory_access_type
)
3046 if (memory_access_type
== VMAT_INVARIANT
)
3047 return size_zero_node
;
3049 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3050 tree step
= vect_dr_behavior (dr_info
)->step
;
3051 if (tree_int_cst_sgn (step
) == -1)
3052 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3056 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3059 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3060 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3061 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3064 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3065 vec_info
*vinfo
= stmt_info
->vinfo
;
3066 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3069 op
= gimple_call_arg (stmt
, 0);
3070 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3071 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3073 /* Multiple types in SLP are handled by creating the appropriate number of
3074 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3079 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3081 gcc_assert (ncopies
>= 1);
3083 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3087 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3088 unsigned word_bytes
;
3089 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3092 /* The encoding uses one stepped pattern for each byte in the word. */
3093 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3094 for (unsigned i
= 0; i
< 3; ++i
)
3095 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3096 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3098 vec_perm_indices
indices (elts
, 1, num_bytes
);
3099 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3104 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3105 DUMP_VECT_SCOPE ("vectorizable_bswap");
3108 record_stmt_cost (cost_vec
,
3109 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3110 record_stmt_cost (cost_vec
,
3111 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3116 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3119 vec
<tree
> vec_oprnds
= vNULL
;
3120 stmt_vec_info new_stmt_info
= NULL
;
3121 stmt_vec_info prev_stmt_info
= NULL
;
3122 for (unsigned j
= 0; j
< ncopies
; j
++)
3126 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
3128 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3130 /* Arguments are ready. create the new vector stmt. */
3133 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3136 tree tem
= make_ssa_name (char_vectype
);
3137 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3138 char_vectype
, vop
));
3139 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3140 tree tem2
= make_ssa_name (char_vectype
);
3141 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3142 tem
, tem
, bswap_vconst
);
3143 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3144 tem
= make_ssa_name (vectype
);
3145 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3148 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3150 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3157 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3159 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3161 prev_stmt_info
= new_stmt_info
;
3164 vec_oprnds
.release ();
3168 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3169 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3170 in a single step. On success, store the binary pack code in
3174 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3175 tree_code
*convert_code
)
3177 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3178 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3182 int multi_step_cvt
= 0;
3183 auto_vec
<tree
, 8> interm_types
;
3184 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3185 &code
, &multi_step_cvt
,
3190 *convert_code
= code
;
3194 /* Function vectorizable_call.
3196 Check if STMT_INFO performs a function call that can be vectorized.
3197 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3198 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3199 Return true if STMT_INFO is vectorizable in this way. */
3202 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3203 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3204 stmt_vector_for_cost
*cost_vec
)
3210 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3211 stmt_vec_info prev_stmt_info
;
3212 tree vectype_out
, vectype_in
;
3213 poly_uint64 nunits_in
;
3214 poly_uint64 nunits_out
;
3215 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3216 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3217 vec_info
*vinfo
= stmt_info
->vinfo
;
3218 tree fndecl
, new_temp
, rhs_type
;
3219 enum vect_def_type dt
[4]
3220 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3221 vect_unknown_def_type
};
3222 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3223 int ndts
= ARRAY_SIZE (dt
);
3225 auto_vec
<tree
, 8> vargs
;
3226 auto_vec
<tree
, 8> orig_vargs
;
3227 enum { NARROW
, NONE
, WIDEN
} modifier
;
3231 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3234 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3238 /* Is STMT_INFO a vectorizable call? */
3239 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3243 if (gimple_call_internal_p (stmt
)
3244 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3245 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3246 /* Handled by vectorizable_load and vectorizable_store. */
3249 if (gimple_call_lhs (stmt
) == NULL_TREE
3250 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3253 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3255 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3257 /* Process function arguments. */
3258 rhs_type
= NULL_TREE
;
3259 vectype_in
= NULL_TREE
;
3260 nargs
= gimple_call_num_args (stmt
);
3262 /* Bail out if the function has more than three arguments, we do not have
3263 interesting builtin functions to vectorize with more than two arguments
3264 except for fma. No arguments is also not good. */
3265 if (nargs
== 0 || nargs
> 4)
3268 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3269 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3270 if (cfn
== CFN_GOMP_SIMD_LANE
)
3273 rhs_type
= unsigned_type_node
;
3277 if (internal_fn_p (cfn
))
3278 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3280 for (i
= 0; i
< nargs
; i
++)
3282 op
= gimple_call_arg (stmt
, i
);
3283 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &vectypes
[i
]))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "use not simple.\n");
3291 /* Skip the mask argument to an internal function. This operand
3292 has been converted via a pattern if necessary. */
3293 if ((int) i
== mask_opno
)
3296 /* We can only handle calls with arguments of the same type. */
3298 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3300 if (dump_enabled_p ())
3301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3302 "argument types differ.\n");
3306 rhs_type
= TREE_TYPE (op
);
3309 vectype_in
= vectypes
[i
];
3310 else if (vectypes
[i
]
3311 && vectypes
[i
] != vectype_in
)
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3315 "argument vector types differ.\n");
3319 /* If all arguments are external or constant defs use a vector type with
3320 the same size as the output vector type. */
3322 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3324 gcc_assert (vectype_in
);
3327 if (dump_enabled_p ())
3328 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3329 "no vectype for scalar type %T\n", rhs_type
);
3335 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3336 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3337 if (known_eq (nunits_in
* 2, nunits_out
))
3339 else if (known_eq (nunits_out
, nunits_in
))
3341 else if (known_eq (nunits_out
* 2, nunits_in
))
3346 /* We only handle functions that do not read or clobber memory. */
3347 if (gimple_vuse (stmt
))
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3351 "function reads from or writes to memory.\n");
3355 /* For now, we only vectorize functions if a target specific builtin
3356 is available. TODO -- in some cases, it might be profitable to
3357 insert the calls for pieces of the vector, in order to be able
3358 to vectorize other operations in the loop. */
3360 internal_fn ifn
= IFN_LAST
;
3361 tree callee
= gimple_call_fndecl (stmt
);
3363 /* First try using an internal function. */
3364 tree_code convert_code
= ERROR_MARK
;
3366 && (modifier
== NONE
3367 || (modifier
== NARROW
3368 && simple_integer_narrowing (vectype_out
, vectype_in
,
3370 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3373 /* If that fails, try asking for a target-specific built-in function. */
3374 if (ifn
== IFN_LAST
)
3376 if (cfn
!= CFN_LAST
)
3377 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3378 (cfn
, vectype_out
, vectype_in
);
3380 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3381 (callee
, vectype_out
, vectype_in
);
3384 if (ifn
== IFN_LAST
&& !fndecl
)
3386 if (cfn
== CFN_GOMP_SIMD_LANE
3389 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3390 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3391 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3392 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3394 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3395 { 0, 1, 2, ... vf - 1 } vector. */
3396 gcc_assert (nargs
== 0);
3398 else if (modifier
== NONE
3399 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3400 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3401 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3402 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3403 vectype_in
, cost_vec
);
3406 if (dump_enabled_p ())
3407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3408 "function is not vectorizable.\n");
3415 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3416 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3418 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3420 /* Sanity check: make sure that at least one copy of the vectorized stmt
3421 needs to be generated. */
3422 gcc_assert (ncopies
>= 1);
3424 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3425 if (!vec_stmt
) /* transformation not required. */
3427 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3428 DUMP_VECT_SCOPE ("vectorizable_call");
3429 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3430 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3431 record_stmt_cost (cost_vec
, ncopies
/ 2,
3432 vec_promote_demote
, stmt_info
, 0, vect_body
);
3434 if (loop_vinfo
&& mask_opno
>= 0)
3436 unsigned int nvectors
= (slp_node
3437 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3439 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
);
3446 if (dump_enabled_p ())
3447 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3450 scalar_dest
= gimple_call_lhs (stmt
);
3451 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3453 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3455 stmt_vec_info new_stmt_info
= NULL
;
3456 prev_stmt_info
= NULL
;
3457 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3459 tree prev_res
= NULL_TREE
;
3460 vargs
.safe_grow (nargs
);
3461 orig_vargs
.safe_grow (nargs
);
3462 for (j
= 0; j
< ncopies
; ++j
)
3464 /* Build argument list for the vectorized call. */
3467 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3468 vec
<tree
> vec_oprnds0
;
3470 for (i
= 0; i
< nargs
; i
++)
3471 vargs
[i
] = gimple_call_arg (stmt
, i
);
3472 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3473 vec_oprnds0
= vec_defs
[0];
3475 /* Arguments are ready. Create the new vector stmt. */
3476 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3479 for (k
= 0; k
< nargs
; k
++)
3481 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3482 vargs
[k
] = vec_oprndsk
[i
];
3484 if (modifier
== NARROW
)
3486 /* We don't define any narrowing conditional functions
3488 gcc_assert (mask_opno
< 0);
3489 tree half_res
= make_ssa_name (vectype_in
);
3491 = gimple_build_call_internal_vec (ifn
, vargs
);
3492 gimple_call_set_lhs (call
, half_res
);
3493 gimple_call_set_nothrow (call
, true);
3494 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3497 prev_res
= half_res
;
3500 new_temp
= make_ssa_name (vec_dest
);
3502 = gimple_build_assign (new_temp
, convert_code
,
3503 prev_res
, half_res
);
3505 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3510 if (mask_opno
>= 0 && masked_loop_p
)
3512 unsigned int vec_num
= vec_oprnds0
.length ();
3513 /* Always true for SLP. */
3514 gcc_assert (ncopies
== 1);
3515 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3517 vargs
[mask_opno
] = prepare_load_store_mask
3518 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3522 if (ifn
!= IFN_LAST
)
3523 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3525 call
= gimple_build_call_vec (fndecl
, vargs
);
3526 new_temp
= make_ssa_name (vec_dest
, call
);
3527 gimple_call_set_lhs (call
, new_temp
);
3528 gimple_call_set_nothrow (call
, true);
3530 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3532 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3535 for (i
= 0; i
< nargs
; i
++)
3537 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3538 vec_oprndsi
.release ();
3543 if (mask_opno
>= 0 && !vectypes
[mask_opno
])
3545 gcc_assert (modifier
!= WIDEN
);
3547 = build_same_sized_truth_vector_type (vectype_in
);
3550 for (i
= 0; i
< nargs
; i
++)
3552 op
= gimple_call_arg (stmt
, i
);
3555 = vect_get_vec_def_for_operand (op
, stmt_info
, vectypes
[i
]);
3558 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3560 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3563 if (mask_opno
>= 0 && masked_loop_p
)
3565 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3568 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3569 vargs
[mask_opno
], gsi
);
3572 if (cfn
== CFN_GOMP_SIMD_LANE
)
3574 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3576 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3577 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3578 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3579 new_temp
= make_ssa_name (vec_dest
);
3580 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3582 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3584 else if (modifier
== NARROW
)
3586 /* We don't define any narrowing conditional functions at
3588 gcc_assert (mask_opno
< 0);
3589 tree half_res
= make_ssa_name (vectype_in
);
3590 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3591 gimple_call_set_lhs (call
, half_res
);
3592 gimple_call_set_nothrow (call
, true);
3593 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3596 prev_res
= half_res
;
3599 new_temp
= make_ssa_name (vec_dest
);
3600 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3601 prev_res
, half_res
);
3603 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3608 if (ifn
!= IFN_LAST
)
3609 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3611 call
= gimple_build_call_vec (fndecl
, vargs
);
3612 new_temp
= make_ssa_name (vec_dest
, call
);
3613 gimple_call_set_lhs (call
, new_temp
);
3614 gimple_call_set_nothrow (call
, true);
3616 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3619 if (j
== (modifier
== NARROW
? 1 : 0))
3620 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3622 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3624 prev_stmt_info
= new_stmt_info
;
3627 else if (modifier
== NARROW
)
3629 /* We don't define any narrowing conditional functions at present. */
3630 gcc_assert (mask_opno
< 0);
3631 for (j
= 0; j
< ncopies
; ++j
)
3633 /* Build argument list for the vectorized call. */
3635 vargs
.create (nargs
* 2);
3641 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3642 vec
<tree
> vec_oprnds0
;
3644 for (i
= 0; i
< nargs
; i
++)
3645 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3646 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3647 vec_oprnds0
= vec_defs
[0];
3649 /* Arguments are ready. Create the new vector stmt. */
3650 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3654 for (k
= 0; k
< nargs
; k
++)
3656 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3657 vargs
.quick_push (vec_oprndsk
[i
]);
3658 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3661 if (ifn
!= IFN_LAST
)
3662 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3664 call
= gimple_build_call_vec (fndecl
, vargs
);
3665 new_temp
= make_ssa_name (vec_dest
, call
);
3666 gimple_call_set_lhs (call
, new_temp
);
3667 gimple_call_set_nothrow (call
, true);
3669 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3670 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3673 for (i
= 0; i
< nargs
; i
++)
3675 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3676 vec_oprndsi
.release ();
3681 for (i
= 0; i
< nargs
; i
++)
3683 op
= gimple_call_arg (stmt
, i
);
3687 = vect_get_vec_def_for_operand (op
, stmt_info
,
3690 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3694 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3697 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3699 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3702 vargs
.quick_push (vec_oprnd0
);
3703 vargs
.quick_push (vec_oprnd1
);
3706 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3707 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3708 gimple_call_set_lhs (new_stmt
, new_temp
);
3710 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3713 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3715 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3717 prev_stmt_info
= new_stmt_info
;
3720 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3723 /* No current target implements this case. */
3728 /* The call in STMT might prevent it from being removed in dce.
3729 We however cannot remove it here, due to the way the ssa name
3730 it defines is mapped to the new definition. So just replace
3731 rhs of the statement with something harmless. */
3736 stmt_info
= vect_orig_stmt (stmt_info
);
3737 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3740 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3741 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3747 struct simd_call_arg_info
3751 HOST_WIDE_INT linear_step
;
3752 enum vect_def_type dt
;
3754 bool simd_lane_linear
;
3757 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3758 is linear within simd lane (but not within whole loop), note it in
3762 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3763 struct simd_call_arg_info
*arginfo
)
3765 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3767 if (!is_gimple_assign (def_stmt
)
3768 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3769 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3772 tree base
= gimple_assign_rhs1 (def_stmt
);
3773 HOST_WIDE_INT linear_step
= 0;
3774 tree v
= gimple_assign_rhs2 (def_stmt
);
3775 while (TREE_CODE (v
) == SSA_NAME
)
3778 def_stmt
= SSA_NAME_DEF_STMT (v
);
3779 if (is_gimple_assign (def_stmt
))
3780 switch (gimple_assign_rhs_code (def_stmt
))
3783 t
= gimple_assign_rhs2 (def_stmt
);
3784 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3786 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3787 v
= gimple_assign_rhs1 (def_stmt
);
3790 t
= gimple_assign_rhs2 (def_stmt
);
3791 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3793 linear_step
= tree_to_shwi (t
);
3794 v
= gimple_assign_rhs1 (def_stmt
);
3797 t
= gimple_assign_rhs1 (def_stmt
);
3798 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3799 || (TYPE_PRECISION (TREE_TYPE (v
))
3800 < TYPE_PRECISION (TREE_TYPE (t
))))
3809 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3811 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3812 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3817 arginfo
->linear_step
= linear_step
;
3819 arginfo
->simd_lane_linear
= true;
3825 /* Return the number of elements in vector type VECTYPE, which is associated
3826 with a SIMD clone. At present these vectors always have a constant
3829 static unsigned HOST_WIDE_INT
3830 simd_clone_subparts (tree vectype
)
3832 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3835 /* Function vectorizable_simd_clone_call.
3837 Check if STMT_INFO performs a function call that can be vectorized
3838 by calling a simd clone of the function.
3839 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3840 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3841 Return true if STMT_INFO is vectorizable in this way. */
3844 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3845 gimple_stmt_iterator
*gsi
,
3846 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3847 stmt_vector_for_cost
*)
3852 tree vec_oprnd0
= NULL_TREE
;
3853 stmt_vec_info prev_stmt_info
;
3855 unsigned int nunits
;
3856 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3857 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3858 vec_info
*vinfo
= stmt_info
->vinfo
;
3859 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3860 tree fndecl
, new_temp
;
3862 auto_vec
<simd_call_arg_info
> arginfo
;
3863 vec
<tree
> vargs
= vNULL
;
3865 tree lhs
, rtype
, ratype
;
3866 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3868 /* Is STMT a vectorizable call? */
3869 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3873 fndecl
= gimple_call_fndecl (stmt
);
3874 if (fndecl
== NULL_TREE
)
3877 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3878 if (node
== NULL
|| node
->simd_clones
== NULL
)
3881 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3884 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3888 if (gimple_call_lhs (stmt
)
3889 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3892 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3894 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3896 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3903 /* Process function arguments. */
3904 nargs
= gimple_call_num_args (stmt
);
3906 /* Bail out if the function has zero arguments. */
3910 arginfo
.reserve (nargs
, true);
3912 for (i
= 0; i
< nargs
; i
++)
3914 simd_call_arg_info thisarginfo
;
3917 thisarginfo
.linear_step
= 0;
3918 thisarginfo
.align
= 0;
3919 thisarginfo
.op
= NULL_TREE
;
3920 thisarginfo
.simd_lane_linear
= false;
3922 op
= gimple_call_arg (stmt
, i
);
3923 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3924 &thisarginfo
.vectype
)
3925 || thisarginfo
.dt
== vect_uninitialized_def
)
3927 if (dump_enabled_p ())
3928 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3929 "use not simple.\n");
3933 if (thisarginfo
.dt
== vect_constant_def
3934 || thisarginfo
.dt
== vect_external_def
)
3935 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3937 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3939 /* For linear arguments, the analyze phase should have saved
3940 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3941 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3942 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3944 gcc_assert (vec_stmt
);
3945 thisarginfo
.linear_step
3946 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3948 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3949 thisarginfo
.simd_lane_linear
3950 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3951 == boolean_true_node
);
3952 /* If loop has been peeled for alignment, we need to adjust it. */
3953 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3954 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3955 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3957 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3958 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3959 tree opt
= TREE_TYPE (thisarginfo
.op
);
3960 bias
= fold_convert (TREE_TYPE (step
), bias
);
3961 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3963 = fold_build2 (POINTER_TYPE_P (opt
)
3964 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3965 thisarginfo
.op
, bias
);
3969 && thisarginfo
.dt
!= vect_constant_def
3970 && thisarginfo
.dt
!= vect_external_def
3972 && TREE_CODE (op
) == SSA_NAME
3973 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3975 && tree_fits_shwi_p (iv
.step
))
3977 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3978 thisarginfo
.op
= iv
.base
;
3980 else if ((thisarginfo
.dt
== vect_constant_def
3981 || thisarginfo
.dt
== vect_external_def
)
3982 && POINTER_TYPE_P (TREE_TYPE (op
)))
3983 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3984 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3986 if (POINTER_TYPE_P (TREE_TYPE (op
))
3987 && !thisarginfo
.linear_step
3989 && thisarginfo
.dt
!= vect_constant_def
3990 && thisarginfo
.dt
!= vect_external_def
3993 && TREE_CODE (op
) == SSA_NAME
)
3994 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3996 arginfo
.quick_push (thisarginfo
);
3999 unsigned HOST_WIDE_INT vf
;
4000 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
4002 if (dump_enabled_p ())
4003 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4004 "not considering SIMD clones; not yet supported"
4005 " for variable-width vectors.\n");
4009 unsigned int badness
= 0;
4010 struct cgraph_node
*bestn
= NULL
;
4011 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4012 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4014 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4015 n
= n
->simdclone
->next_clone
)
4017 unsigned int this_badness
= 0;
4018 if (n
->simdclone
->simdlen
> vf
4019 || n
->simdclone
->nargs
!= nargs
)
4021 if (n
->simdclone
->simdlen
< vf
)
4022 this_badness
+= (exact_log2 (vf
)
4023 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4024 if (n
->simdclone
->inbranch
)
4025 this_badness
+= 2048;
4026 int target_badness
= targetm
.simd_clone
.usable (n
);
4027 if (target_badness
< 0)
4029 this_badness
+= target_badness
* 512;
4030 /* FORNOW: Have to add code to add the mask argument. */
4031 if (n
->simdclone
->inbranch
)
4033 for (i
= 0; i
< nargs
; i
++)
4035 switch (n
->simdclone
->args
[i
].arg_type
)
4037 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4038 if (!useless_type_conversion_p
4039 (n
->simdclone
->args
[i
].orig_type
,
4040 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4042 else if (arginfo
[i
].dt
== vect_constant_def
4043 || arginfo
[i
].dt
== vect_external_def
4044 || arginfo
[i
].linear_step
)
4047 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4048 if (arginfo
[i
].dt
!= vect_constant_def
4049 && arginfo
[i
].dt
!= vect_external_def
)
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4053 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4054 if (arginfo
[i
].dt
== vect_constant_def
4055 || arginfo
[i
].dt
== vect_external_def
4056 || (arginfo
[i
].linear_step
4057 != n
->simdclone
->args
[i
].linear_step
))
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4061 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4062 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4063 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4064 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4065 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4069 case SIMD_CLONE_ARG_TYPE_MASK
:
4072 if (i
== (size_t) -1)
4074 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4079 if (arginfo
[i
].align
)
4080 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4081 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4083 if (i
== (size_t) -1)
4085 if (bestn
== NULL
|| this_badness
< badness
)
4088 badness
= this_badness
;
4095 for (i
= 0; i
< nargs
; i
++)
4096 if ((arginfo
[i
].dt
== vect_constant_def
4097 || arginfo
[i
].dt
== vect_external_def
)
4098 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4101 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
4103 if (arginfo
[i
].vectype
== NULL
4104 || (simd_clone_subparts (arginfo
[i
].vectype
)
4105 > bestn
->simdclone
->simdlen
))
4109 fndecl
= bestn
->decl
;
4110 nunits
= bestn
->simdclone
->simdlen
;
4111 ncopies
= vf
/ nunits
;
4113 /* If the function isn't const, only allow it in simd loops where user
4114 has asserted that at least nunits consecutive iterations can be
4115 performed using SIMD instructions. */
4116 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4117 && gimple_vuse (stmt
))
4120 /* Sanity check: make sure that at least one copy of the vectorized stmt
4121 needs to be generated. */
4122 gcc_assert (ncopies
>= 1);
4124 if (!vec_stmt
) /* transformation not required. */
4126 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4127 for (i
= 0; i
< nargs
; i
++)
4128 if ((bestn
->simdclone
->args
[i
].arg_type
4129 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4130 || (bestn
->simdclone
->args
[i
].arg_type
4131 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4135 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4136 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4137 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4138 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4139 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4140 tree sll
= arginfo
[i
].simd_lane_linear
4141 ? boolean_true_node
: boolean_false_node
;
4142 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4144 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4145 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4146 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4156 scalar_dest
= gimple_call_lhs (stmt
);
4157 vec_dest
= NULL_TREE
;
4162 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4163 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4164 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4167 rtype
= TREE_TYPE (ratype
);
4171 prev_stmt_info
= NULL
;
4172 for (j
= 0; j
< ncopies
; ++j
)
4174 /* Build argument list for the vectorized call. */
4176 vargs
.create (nargs
);
4180 for (i
= 0; i
< nargs
; i
++)
4182 unsigned int k
, l
, m
, o
;
4184 op
= gimple_call_arg (stmt
, i
);
4185 switch (bestn
->simdclone
->args
[i
].arg_type
)
4187 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4188 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4189 o
= nunits
/ simd_clone_subparts (atype
);
4190 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4192 if (simd_clone_subparts (atype
)
4193 < simd_clone_subparts (arginfo
[i
].vectype
))
4195 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4196 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4197 / simd_clone_subparts (atype
));
4198 gcc_assert ((k
& (k
- 1)) == 0);
4201 = vect_get_vec_def_for_operand (op
, stmt_info
);
4204 vec_oprnd0
= arginfo
[i
].op
;
4205 if ((m
& (k
- 1)) == 0)
4207 = vect_get_vec_def_for_stmt_copy (vinfo
,
4210 arginfo
[i
].op
= vec_oprnd0
;
4212 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4214 bitsize_int ((m
& (k
- 1)) * prec
));
4216 = gimple_build_assign (make_ssa_name (atype
),
4218 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4219 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4223 k
= (simd_clone_subparts (atype
)
4224 / simd_clone_subparts (arginfo
[i
].vectype
));
4225 gcc_assert ((k
& (k
- 1)) == 0);
4226 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4228 vec_alloc (ctor_elts
, k
);
4231 for (l
= 0; l
< k
; l
++)
4233 if (m
== 0 && l
== 0)
4235 = vect_get_vec_def_for_operand (op
, stmt_info
);
4238 = vect_get_vec_def_for_stmt_copy (vinfo
,
4240 arginfo
[i
].op
= vec_oprnd0
;
4243 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4247 vargs
.safe_push (vec_oprnd0
);
4250 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4252 = gimple_build_assign (make_ssa_name (atype
),
4254 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4256 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4261 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4262 vargs
.safe_push (op
);
4264 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4265 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4270 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4275 edge pe
= loop_preheader_edge (loop
);
4276 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4277 gcc_assert (!new_bb
);
4279 if (arginfo
[i
].simd_lane_linear
)
4281 vargs
.safe_push (arginfo
[i
].op
);
4284 tree phi_res
= copy_ssa_name (op
);
4285 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4286 loop_vinfo
->add_stmt (new_phi
);
4287 add_phi_arg (new_phi
, arginfo
[i
].op
,
4288 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4290 = POINTER_TYPE_P (TREE_TYPE (op
))
4291 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4292 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4293 ? sizetype
: TREE_TYPE (op
);
4295 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4297 tree tcst
= wide_int_to_tree (type
, cst
);
4298 tree phi_arg
= copy_ssa_name (op
);
4300 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4301 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4302 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4303 loop_vinfo
->add_stmt (new_stmt
);
4304 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4306 arginfo
[i
].op
= phi_res
;
4307 vargs
.safe_push (phi_res
);
4312 = POINTER_TYPE_P (TREE_TYPE (op
))
4313 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4314 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4315 ? sizetype
: TREE_TYPE (op
);
4317 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4319 tree tcst
= wide_int_to_tree (type
, cst
);
4320 new_temp
= make_ssa_name (TREE_TYPE (op
));
4322 = gimple_build_assign (new_temp
, code
,
4323 arginfo
[i
].op
, tcst
);
4324 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4325 vargs
.safe_push (new_temp
);
4328 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4329 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4330 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4331 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4332 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4333 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4339 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4342 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4344 new_temp
= create_tmp_var (ratype
);
4345 else if (simd_clone_subparts (vectype
)
4346 == simd_clone_subparts (rtype
))
4347 new_temp
= make_ssa_name (vec_dest
, new_call
);
4349 new_temp
= make_ssa_name (rtype
, new_call
);
4350 gimple_call_set_lhs (new_call
, new_temp
);
4352 stmt_vec_info new_stmt_info
4353 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4357 if (simd_clone_subparts (vectype
) < nunits
)
4360 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4361 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4362 k
= nunits
/ simd_clone_subparts (vectype
);
4363 gcc_assert ((k
& (k
- 1)) == 0);
4364 for (l
= 0; l
< k
; l
++)
4369 t
= build_fold_addr_expr (new_temp
);
4370 t
= build2 (MEM_REF
, vectype
, t
,
4371 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4374 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4375 bitsize_int (prec
), bitsize_int (l
* prec
));
4377 = gimple_build_assign (make_ssa_name (vectype
), t
);
4379 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4381 if (j
== 0 && l
== 0)
4382 STMT_VINFO_VEC_STMT (stmt_info
)
4383 = *vec_stmt
= new_stmt_info
;
4385 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4387 prev_stmt_info
= new_stmt_info
;
4391 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4394 else if (simd_clone_subparts (vectype
) > nunits
)
4396 unsigned int k
= (simd_clone_subparts (vectype
)
4397 / simd_clone_subparts (rtype
));
4398 gcc_assert ((k
& (k
- 1)) == 0);
4399 if ((j
& (k
- 1)) == 0)
4400 vec_alloc (ret_ctor_elts
, k
);
4403 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4404 for (m
= 0; m
< o
; m
++)
4406 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4407 size_int (m
), NULL_TREE
, NULL_TREE
);
4409 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4411 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4413 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4414 gimple_assign_lhs (new_stmt
));
4416 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4419 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4420 if ((j
& (k
- 1)) != k
- 1)
4422 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4424 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4426 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4428 if ((unsigned) j
== k
- 1)
4429 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4431 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4433 prev_stmt_info
= new_stmt_info
;
4438 tree t
= build_fold_addr_expr (new_temp
);
4439 t
= build2 (MEM_REF
, vectype
, t
,
4440 build_int_cst (TREE_TYPE (t
), 0));
4442 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4444 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4445 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4450 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4452 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4454 prev_stmt_info
= new_stmt_info
;
4459 /* The call in STMT might prevent it from being removed in dce.
4460 We however cannot remove it here, due to the way the ssa name
4461 it defines is mapped to the new definition. So just replace
4462 rhs of the statement with something harmless. */
4470 type
= TREE_TYPE (scalar_dest
);
4471 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4472 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4475 new_stmt
= gimple_build_nop ();
4476 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4477 unlink_stmt_vdef (stmt
);
4483 /* Function vect_gen_widened_results_half
4485 Create a vector stmt whose code, type, number of arguments, and result
4486 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4487 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4488 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4489 needs to be created (DECL is a function-decl of a target-builtin).
4490 STMT_INFO is the original scalar stmt that we are vectorizing. */
4493 vect_gen_widened_results_half (enum tree_code code
,
4495 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4496 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4497 stmt_vec_info stmt_info
)
4502 /* Generate half of the widened result: */
4503 if (code
== CALL_EXPR
)
4505 /* Target specific support */
4506 if (op_type
== binary_op
)
4507 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4509 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4510 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4511 gimple_call_set_lhs (new_stmt
, new_temp
);
4515 /* Generic support */
4516 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4517 if (op_type
!= binary_op
)
4519 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4520 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4521 gimple_assign_set_lhs (new_stmt
, new_temp
);
4523 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4529 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4530 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4531 containing scalar operand), and for the rest we get a copy with
4532 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4533 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4534 The vectors are collected into VEC_OPRNDS. */
4537 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4538 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4540 vec_info
*vinfo
= stmt_info
->vinfo
;
4543 /* Get first vector operand. */
4544 /* All the vector operands except the very first one (that is scalar oprnd)
4546 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4547 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4549 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4551 vec_oprnds
->quick_push (vec_oprnd
);
4553 /* Get second vector operand. */
4554 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4555 vec_oprnds
->quick_push (vec_oprnd
);
4559 /* For conversion in multiple steps, continue to get operands
4562 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4563 multi_step_cvt
- 1);
4567 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4568 For multi-step conversions store the resulting vectors and call the function
4572 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4574 stmt_vec_info stmt_info
,
4576 gimple_stmt_iterator
*gsi
,
4577 slp_tree slp_node
, enum tree_code code
,
4578 stmt_vec_info
*prev_stmt_info
)
4581 tree vop0
, vop1
, new_tmp
, vec_dest
;
4583 vec_dest
= vec_dsts
.pop ();
4585 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4587 /* Create demotion operation. */
4588 vop0
= (*vec_oprnds
)[i
];
4589 vop1
= (*vec_oprnds
)[i
+ 1];
4590 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4591 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4592 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4593 stmt_vec_info new_stmt_info
4594 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4597 /* Store the resulting vector for next recursive call. */
4598 (*vec_oprnds
)[i
/2] = new_tmp
;
4601 /* This is the last step of the conversion sequence. Store the
4602 vectors in SLP_NODE or in vector info of the scalar statement
4603 (or in STMT_VINFO_RELATED_STMT chain). */
4605 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4608 if (!*prev_stmt_info
)
4609 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4611 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4613 *prev_stmt_info
= new_stmt_info
;
4618 /* For multi-step demotion operations we first generate demotion operations
4619 from the source type to the intermediate types, and then combine the
4620 results (stored in VEC_OPRNDS) in demotion operation to the destination
4624 /* At each level of recursion we have half of the operands we had at the
4626 vec_oprnds
->truncate ((i
+1)/2);
4627 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4628 stmt_info
, vec_dsts
, gsi
,
4629 slp_node
, VEC_PACK_TRUNC_EXPR
,
4633 vec_dsts
.quick_push (vec_dest
);
4637 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4638 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4639 STMT_INFO. For multi-step conversions store the resulting vectors and
4640 call the function recursively. */
4643 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4644 vec
<tree
> *vec_oprnds1
,
4645 stmt_vec_info stmt_info
, tree vec_dest
,
4646 gimple_stmt_iterator
*gsi
,
4647 enum tree_code code1
,
4648 enum tree_code code2
, tree decl1
,
4649 tree decl2
, int op_type
)
4652 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4653 gimple
*new_stmt1
, *new_stmt2
;
4654 vec
<tree
> vec_tmp
= vNULL
;
4656 vec_tmp
.create (vec_oprnds0
->length () * 2);
4657 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4659 if (op_type
== binary_op
)
4660 vop1
= (*vec_oprnds1
)[i
];
4664 /* Generate the two halves of promotion operation. */
4665 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4666 op_type
, vec_dest
, gsi
,
4668 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4669 op_type
, vec_dest
, gsi
,
4671 if (is_gimple_call (new_stmt1
))
4673 new_tmp1
= gimple_call_lhs (new_stmt1
);
4674 new_tmp2
= gimple_call_lhs (new_stmt2
);
4678 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4679 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4682 /* Store the results for the next step. */
4683 vec_tmp
.quick_push (new_tmp1
);
4684 vec_tmp
.quick_push (new_tmp2
);
4687 vec_oprnds0
->release ();
4688 *vec_oprnds0
= vec_tmp
;
4692 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4693 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4694 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4695 Return true if STMT_INFO is vectorizable in this way. */
4698 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4699 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4700 stmt_vector_for_cost
*cost_vec
)
4704 tree op0
, op1
= NULL_TREE
;
4705 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4706 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4707 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4708 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4709 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4711 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4713 stmt_vec_info prev_stmt_info
;
4714 poly_uint64 nunits_in
;
4715 poly_uint64 nunits_out
;
4716 tree vectype_out
, vectype_in
;
4718 tree lhs_type
, rhs_type
;
4719 enum { NARROW
, NONE
, WIDEN
} modifier
;
4720 vec
<tree
> vec_oprnds0
= vNULL
;
4721 vec
<tree
> vec_oprnds1
= vNULL
;
4723 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4724 vec_info
*vinfo
= stmt_info
->vinfo
;
4725 int multi_step_cvt
= 0;
4726 vec
<tree
> interm_types
= vNULL
;
4727 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4729 unsigned short fltsz
;
4731 /* Is STMT a vectorizable conversion? */
4733 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4736 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4740 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4744 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4747 code
= gimple_assign_rhs_code (stmt
);
4748 if (!CONVERT_EXPR_CODE_P (code
)
4749 && code
!= FIX_TRUNC_EXPR
4750 && code
!= FLOAT_EXPR
4751 && code
!= WIDEN_MULT_EXPR
4752 && code
!= WIDEN_LSHIFT_EXPR
)
4755 op_type
= TREE_CODE_LENGTH (code
);
4757 /* Check types of lhs and rhs. */
4758 scalar_dest
= gimple_assign_lhs (stmt
);
4759 lhs_type
= TREE_TYPE (scalar_dest
);
4760 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4762 op0
= gimple_assign_rhs1 (stmt
);
4763 rhs_type
= TREE_TYPE (op0
);
4765 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4766 && !((INTEGRAL_TYPE_P (lhs_type
)
4767 && INTEGRAL_TYPE_P (rhs_type
))
4768 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4769 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4772 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4773 && ((INTEGRAL_TYPE_P (lhs_type
)
4774 && !type_has_mode_precision_p (lhs_type
))
4775 || (INTEGRAL_TYPE_P (rhs_type
)
4776 && !type_has_mode_precision_p (rhs_type
))))
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4780 "type conversion to/from bit-precision unsupported."
4785 /* Check the operands of the operation. */
4786 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4788 if (dump_enabled_p ())
4789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4790 "use not simple.\n");
4793 if (op_type
== binary_op
)
4797 op1
= gimple_assign_rhs2 (stmt
);
4798 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4799 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4801 if (CONSTANT_CLASS_P (op0
))
4802 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4804 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4808 if (dump_enabled_p ())
4809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4810 "use not simple.\n");
4815 /* If op0 is an external or constant defs use a vector type of
4816 the same size as the output vector type. */
4818 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4820 gcc_assert (vectype_in
);
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4825 "no vectype for scalar type %T\n", rhs_type
);
4830 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4831 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4833 if (dump_enabled_p ())
4834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4835 "can't convert between boolean and non "
4836 "boolean vectors %T\n", rhs_type
);
4841 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4842 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4843 if (known_eq (nunits_out
, nunits_in
))
4845 else if (multiple_p (nunits_out
, nunits_in
))
4849 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4853 /* Multiple types in SLP are handled by creating the appropriate number of
4854 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4858 else if (modifier
== NARROW
)
4859 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4861 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4863 /* Sanity check: make sure that at least one copy of the vectorized stmt
4864 needs to be generated. */
4865 gcc_assert (ncopies
>= 1);
4867 bool found_mode
= false;
4868 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4869 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4870 opt_scalar_mode rhs_mode_iter
;
4872 /* Supportable by target? */
4876 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4878 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4885 "conversion not supported by target.\n");
4889 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4890 vectype_in
, &code1
, &code2
,
4891 &multi_step_cvt
, &interm_types
))
4893 /* Binary widening operation can only be supported directly by the
4895 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4899 if (code
!= FLOAT_EXPR
4900 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4903 fltsz
= GET_MODE_SIZE (lhs_mode
);
4904 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4906 rhs_mode
= rhs_mode_iter
.require ();
4907 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4911 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4912 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4913 if (cvt_type
== NULL_TREE
)
4916 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4918 if (!supportable_convert_operation (code
, vectype_out
,
4919 cvt_type
, &decl1
, &codecvt1
))
4922 else if (!supportable_widening_operation (code
, stmt_info
,
4923 vectype_out
, cvt_type
,
4924 &codecvt1
, &codecvt2
,
4929 gcc_assert (multi_step_cvt
== 0);
4931 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4932 vectype_in
, &code1
, &code2
,
4933 &multi_step_cvt
, &interm_types
))
4943 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4944 codecvt2
= ERROR_MARK
;
4948 interm_types
.safe_push (cvt_type
);
4949 cvt_type
= NULL_TREE
;
4954 gcc_assert (op_type
== unary_op
);
4955 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4956 &code1
, &multi_step_cvt
,
4960 if (code
!= FIX_TRUNC_EXPR
4961 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4965 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4966 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4967 if (cvt_type
== NULL_TREE
)
4969 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4972 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4973 &code1
, &multi_step_cvt
,
4982 if (!vec_stmt
) /* transformation not required. */
4984 DUMP_VECT_SCOPE ("vectorizable_conversion");
4985 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4987 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4988 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4991 else if (modifier
== NARROW
)
4993 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4994 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4999 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5000 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
5003 interm_types
.release ();
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE
, vect_location
,
5010 "transform conversion. ncopies = %d.\n", ncopies
);
5012 if (op_type
== binary_op
)
5014 if (CONSTANT_CLASS_P (op0
))
5015 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5016 else if (CONSTANT_CLASS_P (op1
))
5017 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5020 /* In case of multi-step conversion, we first generate conversion operations
5021 to the intermediate types, and then from that types to the final one.
5022 We create vector destinations for the intermediate type (TYPES) received
5023 from supportable_*_operation, and store them in the correct order
5024 for future use in vect_create_vectorized_*_stmts (). */
5025 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5026 vec_dest
= vect_create_destination_var (scalar_dest
,
5027 (cvt_type
&& modifier
== WIDEN
)
5028 ? cvt_type
: vectype_out
);
5029 vec_dsts
.quick_push (vec_dest
);
5033 for (i
= interm_types
.length () - 1;
5034 interm_types
.iterate (i
, &intermediate_type
); i
--)
5036 vec_dest
= vect_create_destination_var (scalar_dest
,
5038 vec_dsts
.quick_push (vec_dest
);
5043 vec_dest
= vect_create_destination_var (scalar_dest
,
5045 ? vectype_out
: cvt_type
);
5049 if (modifier
== WIDEN
)
5051 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5052 if (op_type
== binary_op
)
5053 vec_oprnds1
.create (1);
5055 else if (modifier
== NARROW
)
5056 vec_oprnds0
.create (
5057 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5059 else if (code
== WIDEN_LSHIFT_EXPR
)
5060 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5063 prev_stmt_info
= NULL
;
5067 for (j
= 0; j
< ncopies
; j
++)
5070 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
5073 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5075 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5077 stmt_vec_info new_stmt_info
;
5078 /* Arguments are ready, create the new vector stmt. */
5079 if (code1
== CALL_EXPR
)
5081 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5082 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5083 gimple_call_set_lhs (new_stmt
, new_temp
);
5085 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5089 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5091 = gimple_build_assign (vec_dest
, code1
, vop0
);
5092 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5093 gimple_assign_set_lhs (new_stmt
, new_temp
);
5095 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5099 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5102 if (!prev_stmt_info
)
5103 STMT_VINFO_VEC_STMT (stmt_info
)
5104 = *vec_stmt
= new_stmt_info
;
5106 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5107 prev_stmt_info
= new_stmt_info
;
5114 /* In case the vectorization factor (VF) is bigger than the number
5115 of elements that we can fit in a vectype (nunits), we have to
5116 generate more than one vector stmt - i.e - we need to "unroll"
5117 the vector stmt by a factor VF/nunits. */
5118 for (j
= 0; j
< ncopies
; j
++)
5125 if (code
== WIDEN_LSHIFT_EXPR
)
5130 /* Store vec_oprnd1 for every vector stmt to be created
5131 for SLP_NODE. We check during the analysis that all
5132 the shift arguments are the same. */
5133 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5134 vec_oprnds1
.quick_push (vec_oprnd1
);
5136 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
5137 &vec_oprnds0
, NULL
, slp_node
);
5140 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
5141 &vec_oprnds1
, slp_node
);
5145 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5146 vec_oprnds0
.quick_push (vec_oprnd0
);
5147 if (op_type
== binary_op
)
5149 if (code
== WIDEN_LSHIFT_EXPR
)
5153 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5154 vec_oprnds1
.quick_push (vec_oprnd1
);
5160 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5161 vec_oprnds0
.truncate (0);
5162 vec_oprnds0
.quick_push (vec_oprnd0
);
5163 if (op_type
== binary_op
)
5165 if (code
== WIDEN_LSHIFT_EXPR
)
5168 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5170 vec_oprnds1
.truncate (0);
5171 vec_oprnds1
.quick_push (vec_oprnd1
);
5175 /* Arguments are ready. Create the new vector stmts. */
5176 for (i
= multi_step_cvt
; i
>= 0; i
--)
5178 tree this_dest
= vec_dsts
[i
];
5179 enum tree_code c1
= code1
, c2
= code2
;
5180 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5185 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5186 &vec_oprnds1
, stmt_info
,
5188 c1
, c2
, decl1
, decl2
,
5192 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5194 stmt_vec_info new_stmt_info
;
5197 if (codecvt1
== CALL_EXPR
)
5199 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5200 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5201 gimple_call_set_lhs (new_stmt
, new_temp
);
5203 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5208 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5209 new_temp
= make_ssa_name (vec_dest
);
5211 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5213 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5218 new_stmt_info
= vinfo
->lookup_def (vop0
);
5221 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5224 if (!prev_stmt_info
)
5225 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5227 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5228 prev_stmt_info
= new_stmt_info
;
5233 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5237 /* In case the vectorization factor (VF) is bigger than the number
5238 of elements that we can fit in a vectype (nunits), we have to
5239 generate more than one vector stmt - i.e - we need to "unroll"
5240 the vector stmt by a factor VF/nunits. */
5241 for (j
= 0; j
< ncopies
; j
++)
5245 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5249 vec_oprnds0
.truncate (0);
5250 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5251 vect_pow2 (multi_step_cvt
) - 1);
5254 /* Arguments are ready. Create the new vector stmts. */
5256 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5258 if (codecvt1
== CALL_EXPR
)
5260 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5261 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5262 gimple_call_set_lhs (new_stmt
, new_temp
);
5263 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5267 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5268 new_temp
= make_ssa_name (vec_dest
);
5270 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5271 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5274 vec_oprnds0
[i
] = new_temp
;
5277 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5278 stmt_info
, vec_dsts
, gsi
,
5283 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5287 vec_oprnds0
.release ();
5288 vec_oprnds1
.release ();
5289 interm_types
.release ();
5295 /* Function vectorizable_assignment.
5297 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5298 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5299 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5300 Return true if STMT_INFO is vectorizable in this way. */
5303 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5304 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5305 stmt_vector_for_cost
*cost_vec
)
5310 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5312 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5316 vec
<tree
> vec_oprnds
= vNULL
;
5318 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5319 vec_info
*vinfo
= stmt_info
->vinfo
;
5320 stmt_vec_info prev_stmt_info
= NULL
;
5321 enum tree_code code
;
5324 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5327 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5331 /* Is vectorizable assignment? */
5332 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5336 scalar_dest
= gimple_assign_lhs (stmt
);
5337 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5340 code
= gimple_assign_rhs_code (stmt
);
5341 if (gimple_assign_single_p (stmt
)
5342 || code
== PAREN_EXPR
5343 || CONVERT_EXPR_CODE_P (code
))
5344 op
= gimple_assign_rhs1 (stmt
);
5348 if (code
== VIEW_CONVERT_EXPR
)
5349 op
= TREE_OPERAND (op
, 0);
5351 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5352 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5354 /* Multiple types in SLP are handled by creating the appropriate number of
5355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5360 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5362 gcc_assert (ncopies
>= 1);
5364 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5366 if (dump_enabled_p ())
5367 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5368 "use not simple.\n");
5372 /* We can handle NOP_EXPR conversions that do not change the number
5373 of elements or the vector size. */
5374 if ((CONVERT_EXPR_CODE_P (code
)
5375 || code
== VIEW_CONVERT_EXPR
)
5377 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5378 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5379 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5382 /* We do not handle bit-precision changes. */
5383 if ((CONVERT_EXPR_CODE_P (code
)
5384 || code
== VIEW_CONVERT_EXPR
)
5385 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5386 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5387 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5388 /* But a conversion that does not change the bit-pattern is ok. */
5389 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5390 > TYPE_PRECISION (TREE_TYPE (op
)))
5391 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5392 /* Conversion between boolean types of different sizes is
5393 a simple assignment in case their vectypes are same
5395 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5396 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5400 "type conversion to/from bit-precision "
5405 if (!vec_stmt
) /* transformation not required. */
5407 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5408 DUMP_VECT_SCOPE ("vectorizable_assignment");
5409 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5414 if (dump_enabled_p ())
5415 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5418 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5421 for (j
= 0; j
< ncopies
; j
++)
5425 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5427 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5429 /* Arguments are ready. create the new vector stmt. */
5430 stmt_vec_info new_stmt_info
= NULL
;
5431 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5433 if (CONVERT_EXPR_CODE_P (code
)
5434 || code
== VIEW_CONVERT_EXPR
)
5435 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5436 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5437 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5438 gimple_assign_set_lhs (new_stmt
, new_temp
);
5440 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5442 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5449 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5451 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5453 prev_stmt_info
= new_stmt_info
;
5456 vec_oprnds
.release ();
5461 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5462 either as shift by a scalar or by a vector. */
5465 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5468 machine_mode vec_mode
;
5473 vectype
= get_vectype_for_scalar_type (scalar_type
);
5477 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5479 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5481 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5483 || (optab_handler (optab
, TYPE_MODE (vectype
))
5484 == CODE_FOR_nothing
))
5488 vec_mode
= TYPE_MODE (vectype
);
5489 icode
= (int) optab_handler (optab
, vec_mode
);
5490 if (icode
== CODE_FOR_nothing
)
5497 /* Function vectorizable_shift.
5499 Check if STMT_INFO performs a shift operation that can be vectorized.
5500 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5501 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5502 Return true if STMT_INFO is vectorizable in this way. */
5505 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5506 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5507 stmt_vector_for_cost
*cost_vec
)
5511 tree op0
, op1
= NULL
;
5512 tree vec_oprnd1
= NULL_TREE
;
5514 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5515 enum tree_code code
;
5516 machine_mode vec_mode
;
5520 machine_mode optab_op2_mode
;
5521 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5523 stmt_vec_info prev_stmt_info
;
5524 poly_uint64 nunits_in
;
5525 poly_uint64 nunits_out
;
5530 vec
<tree
> vec_oprnds0
= vNULL
;
5531 vec
<tree
> vec_oprnds1
= vNULL
;
5534 bool scalar_shift_arg
= true;
5535 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5536 vec_info
*vinfo
= stmt_info
->vinfo
;
5538 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5541 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5542 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5546 /* Is STMT a vectorizable binary/unary operation? */
5547 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5551 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5554 code
= gimple_assign_rhs_code (stmt
);
5556 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5557 || code
== RROTATE_EXPR
))
5560 scalar_dest
= gimple_assign_lhs (stmt
);
5561 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5562 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5564 if (dump_enabled_p ())
5565 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5566 "bit-precision shifts not supported.\n");
5570 op0
= gimple_assign_rhs1 (stmt
);
5571 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5573 if (dump_enabled_p ())
5574 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5575 "use not simple.\n");
5578 /* If op0 is an external or constant def use a vector type with
5579 the same size as the output vector type. */
5581 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5583 gcc_assert (vectype
);
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5588 "no vectype for scalar type\n");
5592 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5593 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5594 if (maybe_ne (nunits_out
, nunits_in
))
5597 op1
= gimple_assign_rhs2 (stmt
);
5598 stmt_vec_info op1_def_stmt_info
;
5599 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5600 &op1_def_stmt_info
))
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5604 "use not simple.\n");
5608 /* Multiple types in SLP are handled by creating the appropriate number of
5609 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5614 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5616 gcc_assert (ncopies
>= 1);
5618 /* Determine whether the shift amount is a vector, or scalar. If the
5619 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5621 if ((dt
[1] == vect_internal_def
5622 || dt
[1] == vect_induction_def
5623 || dt
[1] == vect_nested_cycle
)
5625 scalar_shift_arg
= false;
5626 else if (dt
[1] == vect_constant_def
5627 || dt
[1] == vect_external_def
5628 || dt
[1] == vect_internal_def
)
5630 /* In SLP, need to check whether the shift count is the same,
5631 in loops if it is a constant or invariant, it is always
5635 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5636 stmt_vec_info slpstmt_info
;
5638 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5640 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5641 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5642 scalar_shift_arg
= false;
5645 /* For internal SLP defs we have to make sure we see scalar stmts
5646 for all vector elements.
5647 ??? For different vectors we could resort to a different
5648 scalar shift operand but code-generation below simply always
5650 if (dt
[1] == vect_internal_def
5651 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5653 scalar_shift_arg
= false;
5656 /* If the shift amount is computed by a pattern stmt we cannot
5657 use the scalar amount directly thus give up and use a vector
5659 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5660 scalar_shift_arg
= false;
5664 if (dump_enabled_p ())
5665 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5666 "operand mode requires invariant argument.\n");
5670 /* Vector shifted by vector. */
5671 if (!scalar_shift_arg
)
5673 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5674 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_NOTE
, vect_location
,
5676 "vector/vector shift/rotate found.\n");
5679 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5680 if (op1_vectype
== NULL_TREE
5681 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5683 if (dump_enabled_p ())
5684 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5685 "unusable type for last operand in"
5686 " vector/vector shift/rotate.\n");
5690 /* See if the machine has a vector shifted by scalar insn and if not
5691 then see if it has a vector shifted by vector insn. */
5694 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5696 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_NOTE
, vect_location
,
5700 "vector/scalar shift/rotate found.\n");
5704 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5706 && (optab_handler (optab
, TYPE_MODE (vectype
))
5707 != CODE_FOR_nothing
))
5709 scalar_shift_arg
= false;
5711 if (dump_enabled_p ())
5712 dump_printf_loc (MSG_NOTE
, vect_location
,
5713 "vector/vector shift/rotate found.\n");
5715 /* Unlike the other binary operators, shifts/rotates have
5716 the rhs being int, instead of the same type as the lhs,
5717 so make sure the scalar is the right type if we are
5718 dealing with vectors of long long/long/short/char. */
5719 if (dt
[1] == vect_constant_def
)
5720 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5721 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5725 && TYPE_MODE (TREE_TYPE (vectype
))
5726 != TYPE_MODE (TREE_TYPE (op1
)))
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5730 "unusable type for last operand in"
5731 " vector/vector shift/rotate.\n");
5734 if (vec_stmt
&& !slp_node
)
5736 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5737 op1
= vect_init_vector (stmt_info
, op1
,
5738 TREE_TYPE (vectype
), NULL
);
5745 /* Supportable by target? */
5748 if (dump_enabled_p ())
5749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5753 vec_mode
= TYPE_MODE (vectype
);
5754 icode
= (int) optab_handler (optab
, vec_mode
);
5755 if (icode
== CODE_FOR_nothing
)
5757 if (dump_enabled_p ())
5758 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5759 "op not supported by target.\n");
5760 /* Check only during analysis. */
5761 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5763 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_NOTE
, vect_location
,
5767 "proceeding using word mode.\n");
5770 /* Worthwhile without SIMD support? Check only during analysis. */
5772 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5773 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5775 if (dump_enabled_p ())
5776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5777 "not worthwhile without SIMD support.\n");
5781 if (!vec_stmt
) /* transformation not required. */
5783 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5784 DUMP_VECT_SCOPE ("vectorizable_shift");
5785 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_NOTE
, vect_location
,
5793 "transform binary/unary operation.\n");
5796 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5798 prev_stmt_info
= NULL
;
5799 for (j
= 0; j
< ncopies
; j
++)
5804 if (scalar_shift_arg
)
5806 /* Vector shl and shr insn patterns can be defined with scalar
5807 operand 2 (shift operand). In this case, use constant or loop
5808 invariant op1 directly, without extending it to vector mode
5810 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5811 if (!VECTOR_MODE_P (optab_op2_mode
))
5813 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_NOTE
, vect_location
,
5815 "operand 1 using scalar mode.\n");
5817 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5818 vec_oprnds1
.quick_push (vec_oprnd1
);
5821 /* Store vec_oprnd1 for every vector stmt to be created
5822 for SLP_NODE. We check during the analysis that all
5823 the shift arguments are the same.
5824 TODO: Allow different constants for different vector
5825 stmts generated for an SLP instance. */
5826 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5827 vec_oprnds1
.quick_push (vec_oprnd1
);
5832 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5833 (a special case for certain kind of vector shifts); otherwise,
5834 operand 1 should be of a vector type (the usual case). */
5836 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5839 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5843 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5845 /* Arguments are ready. Create the new vector stmt. */
5846 stmt_vec_info new_stmt_info
= NULL
;
5847 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5849 vop1
= vec_oprnds1
[i
];
5850 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5851 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5852 gimple_assign_set_lhs (new_stmt
, new_temp
);
5854 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5856 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5863 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5865 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5866 prev_stmt_info
= new_stmt_info
;
5869 vec_oprnds0
.release ();
5870 vec_oprnds1
.release ();
5876 /* Function vectorizable_operation.
5878 Check if STMT_INFO performs a binary, unary or ternary operation that can
5880 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5881 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5882 Return true if STMT_INFO is vectorizable in this way. */
5885 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5886 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5887 stmt_vector_for_cost
*cost_vec
)
5891 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5893 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5894 enum tree_code code
, orig_code
;
5895 machine_mode vec_mode
;
5899 bool target_support_p
;
5900 enum vect_def_type dt
[3]
5901 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5903 stmt_vec_info prev_stmt_info
;
5904 poly_uint64 nunits_in
;
5905 poly_uint64 nunits_out
;
5909 vec
<tree
> vec_oprnds0
= vNULL
;
5910 vec
<tree
> vec_oprnds1
= vNULL
;
5911 vec
<tree
> vec_oprnds2
= vNULL
;
5912 tree vop0
, vop1
, vop2
;
5913 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5914 vec_info
*vinfo
= stmt_info
->vinfo
;
5916 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5919 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5923 /* Is STMT a vectorizable binary/unary operation? */
5924 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5928 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5931 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5933 /* For pointer addition and subtraction, we should use the normal
5934 plus and minus for the vector operation. */
5935 if (code
== POINTER_PLUS_EXPR
)
5937 if (code
== POINTER_DIFF_EXPR
)
5940 /* Support only unary or binary operations. */
5941 op_type
= TREE_CODE_LENGTH (code
);
5942 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5944 if (dump_enabled_p ())
5945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5946 "num. args = %d (not unary/binary/ternary op).\n",
5951 scalar_dest
= gimple_assign_lhs (stmt
);
5952 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5954 /* Most operations cannot handle bit-precision types without extra
5956 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5957 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5958 /* Exception are bitwise binary operations. */
5959 && code
!= BIT_IOR_EXPR
5960 && code
!= BIT_XOR_EXPR
5961 && code
!= BIT_AND_EXPR
)
5963 if (dump_enabled_p ())
5964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5965 "bit-precision arithmetic not supported.\n");
5969 op0
= gimple_assign_rhs1 (stmt
);
5970 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5972 if (dump_enabled_p ())
5973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5974 "use not simple.\n");
5977 /* If op0 is an external or constant def use a vector type with
5978 the same size as the output vector type. */
5981 /* For boolean type we cannot determine vectype by
5982 invariant value (don't know whether it is a vector
5983 of booleans or vector of integers). We use output
5984 vectype because operations on boolean don't change
5986 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5988 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5990 if (dump_enabled_p ())
5991 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5992 "not supported operation on bool value.\n");
5995 vectype
= vectype_out
;
5998 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
6001 gcc_assert (vectype
);
6004 if (dump_enabled_p ())
6005 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6006 "no vectype for scalar type %T\n",
6012 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6013 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6014 if (maybe_ne (nunits_out
, nunits_in
))
6017 if (op_type
== binary_op
|| op_type
== ternary_op
)
6019 op1
= gimple_assign_rhs2 (stmt
);
6020 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
6022 if (dump_enabled_p ())
6023 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6024 "use not simple.\n");
6028 if (op_type
== ternary_op
)
6030 op2
= gimple_assign_rhs3 (stmt
);
6031 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6035 "use not simple.\n");
6040 /* Multiple types in SLP are handled by creating the appropriate number of
6041 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6046 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6048 gcc_assert (ncopies
>= 1);
6050 /* Shifts are handled in vectorizable_shift (). */
6051 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6052 || code
== RROTATE_EXPR
)
6055 /* Supportable by target? */
6057 vec_mode
= TYPE_MODE (vectype
);
6058 if (code
== MULT_HIGHPART_EXPR
)
6059 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6062 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6065 if (dump_enabled_p ())
6066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6070 target_support_p
= (optab_handler (optab
, vec_mode
)
6071 != CODE_FOR_nothing
);
6074 if (!target_support_p
)
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6078 "op not supported by target.\n");
6079 /* Check only during analysis. */
6080 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6081 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_NOTE
, vect_location
,
6085 "proceeding using word mode.\n");
6088 /* Worthwhile without SIMD support? Check only during analysis. */
6089 if (!VECTOR_MODE_P (vec_mode
)
6091 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6093 if (dump_enabled_p ())
6094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6095 "not worthwhile without SIMD support.\n");
6099 if (!vec_stmt
) /* transformation not required. */
6101 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6102 DUMP_VECT_SCOPE ("vectorizable_operation");
6103 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6109 if (dump_enabled_p ())
6110 dump_printf_loc (MSG_NOTE
, vect_location
,
6111 "transform binary/unary operation.\n");
6113 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6114 vectors with unsigned elements, but the result is signed. So, we
6115 need to compute the MINUS_EXPR into vectype temporary and
6116 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6117 tree vec_cvt_dest
= NULL_TREE
;
6118 if (orig_code
== POINTER_DIFF_EXPR
)
6120 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6121 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6125 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6127 /* In case the vectorization factor (VF) is bigger than the number
6128 of elements that we can fit in a vectype (nunits), we have to generate
6129 more than one vector stmt - i.e - we need to "unroll" the
6130 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6131 from one copy of the vector stmt to the next, in the field
6132 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6133 stages to find the correct vector defs to be used when vectorizing
6134 stmts that use the defs of the current stmt. The example below
6135 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6136 we need to create 4 vectorized stmts):
6138 before vectorization:
6139 RELATED_STMT VEC_STMT
6143 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6145 RELATED_STMT VEC_STMT
6146 VS1_0: vx0 = memref0 VS1_1 -
6147 VS1_1: vx1 = memref1 VS1_2 -
6148 VS1_2: vx2 = memref2 VS1_3 -
6149 VS1_3: vx3 = memref3 - -
6150 S1: x = load - VS1_0
6153 step2: vectorize stmt S2 (done here):
6154 To vectorize stmt S2 we first need to find the relevant vector
6155 def for the first operand 'x'. This is, as usual, obtained from
6156 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6157 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6158 relevant vector def 'vx0'. Having found 'vx0' we can generate
6159 the vector stmt VS2_0, and as usual, record it in the
6160 STMT_VINFO_VEC_STMT of stmt S2.
6161 When creating the second copy (VS2_1), we obtain the relevant vector
6162 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6163 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6164 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6165 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6166 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6167 chain of stmts and pointers:
6168 RELATED_STMT VEC_STMT
6169 VS1_0: vx0 = memref0 VS1_1 -
6170 VS1_1: vx1 = memref1 VS1_2 -
6171 VS1_2: vx2 = memref2 VS1_3 -
6172 VS1_3: vx3 = memref3 - -
6173 S1: x = load - VS1_0
6174 VS2_0: vz0 = vx0 + v1 VS2_1 -
6175 VS2_1: vz1 = vx1 + v1 VS2_2 -
6176 VS2_2: vz2 = vx2 + v1 VS2_3 -
6177 VS2_3: vz3 = vx3 + v1 - -
6178 S2: z = x + 1 - VS2_0 */
6180 prev_stmt_info
= NULL
;
6181 for (j
= 0; j
< ncopies
; j
++)
6186 if (op_type
== binary_op
)
6187 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6189 else if (op_type
== ternary_op
)
6193 auto_vec
<tree
> ops(3);
6194 ops
.quick_push (op0
);
6195 ops
.quick_push (op1
);
6196 ops
.quick_push (op2
);
6197 auto_vec
<vec
<tree
> > vec_defs(3);
6198 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
6199 vec_oprnds0
= vec_defs
[0];
6200 vec_oprnds1
= vec_defs
[1];
6201 vec_oprnds2
= vec_defs
[2];
6205 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6206 &vec_oprnds1
, NULL
);
6207 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6212 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6217 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6218 if (op_type
== ternary_op
)
6220 tree vec_oprnd
= vec_oprnds2
.pop ();
6221 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6226 /* Arguments are ready. Create the new vector stmt. */
6227 stmt_vec_info new_stmt_info
= NULL
;
6228 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6230 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6231 ? vec_oprnds1
[i
] : NULL_TREE
);
6232 vop2
= ((op_type
== ternary_op
)
6233 ? vec_oprnds2
[i
] : NULL_TREE
);
6234 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6236 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6237 gimple_assign_set_lhs (new_stmt
, new_temp
);
6239 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6242 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6244 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6246 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6247 gimple_assign_set_lhs (new_stmt
, new_temp
);
6249 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6252 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6259 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6261 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6262 prev_stmt_info
= new_stmt_info
;
6265 vec_oprnds0
.release ();
6266 vec_oprnds1
.release ();
6267 vec_oprnds2
.release ();
6272 /* A helper function to ensure data reference DR_INFO's base alignment. */
6275 ensure_base_align (dr_vec_info
*dr_info
)
6277 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6280 if (dr_info
->base_misaligned
)
6282 tree base_decl
= dr_info
->base_decl
;
6284 // We should only be able to increase the alignment of a base object if
6285 // we know what its new alignment should be at compile time.
6286 unsigned HOST_WIDE_INT align_base_to
=
6287 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6289 if (decl_in_symtab_p (base_decl
))
6290 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6293 SET_DECL_ALIGN (base_decl
, align_base_to
);
6294 DECL_USER_ALIGN (base_decl
) = 1;
6296 dr_info
->base_misaligned
= false;
6301 /* Function get_group_alias_ptr_type.
6303 Return the alias type for the group starting at FIRST_STMT_INFO. */
6306 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6308 struct data_reference
*first_dr
, *next_dr
;
6310 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6311 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6312 while (next_stmt_info
)
6314 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6315 if (get_alias_set (DR_REF (first_dr
))
6316 != get_alias_set (DR_REF (next_dr
)))
6318 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_NOTE
, vect_location
,
6320 "conflicting alias set types.\n");
6321 return ptr_type_node
;
6323 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6325 return reference_alias_ptr_type (DR_REF (first_dr
));
6329 /* Function scan_operand_equal_p.
6331 Helper function for check_scan_store. Compare two references
6332 with .GOMP_SIMD_LANE bases. */
6335 scan_operand_equal_p (tree ref1
, tree ref2
)
6337 machine_mode mode1
, mode2
;
6338 poly_int64 bitsize1
, bitsize2
, bitpos1
, bitpos2
;
6339 tree offset1
, offset2
;
6340 int unsignedp1
, unsignedp2
, reversep1
, reversep2
;
6341 int volatilep1
= 0, volatilep2
= 0;
6342 tree base1
= get_inner_reference (ref1
, &bitsize1
, &bitpos1
, &offset1
,
6343 &mode1
, &unsignedp1
, &reversep1
,
6345 tree base2
= get_inner_reference (ref2
, &bitsize2
, &bitpos2
, &offset2
,
6346 &mode2
, &unsignedp2
, &reversep2
,
6348 if (reversep1
|| reversep2
|| volatilep1
|| volatilep2
)
6350 if (!operand_equal_p (base1
, base2
, 0))
6352 if (maybe_ne (bitpos1
, 0) || maybe_ne (bitpos2
, 0))
6354 if (maybe_ne (bitsize1
, bitsize2
))
6356 if (offset1
!= offset2
6359 || !operand_equal_p (offset1
, offset2
, 0)))
6365 enum scan_store_kind
{
6366 /* Normal permutation. */
6367 scan_store_kind_perm
,
6369 /* Whole vector left shift permutation with zero init. */
6370 scan_store_kind_lshift_zero
,
6372 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6373 scan_store_kind_lshift_cond
6376 /* Function check_scan_store.
6378 Verify if we can perform the needed permutations or whole vector shifts.
6379 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6380 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6381 to do at each step. */
6384 scan_store_can_perm_p (tree vectype
, tree init
,
6385 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6387 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6388 unsigned HOST_WIDE_INT nunits
;
6389 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6391 int units_log2
= exact_log2 (nunits
);
6392 if (units_log2
<= 0)
6396 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6397 for (i
= 0; i
<= units_log2
; ++i
)
6399 unsigned HOST_WIDE_INT j
, k
;
6400 enum scan_store_kind kind
= scan_store_kind_perm
;
6401 vec_perm_builder
sel (nunits
, nunits
, 1);
6402 sel
.quick_grow (nunits
);
6403 if (i
== units_log2
)
6405 for (j
= 0; j
< nunits
; ++j
)
6406 sel
[j
] = nunits
- 1;
6410 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6412 for (k
= 0; j
< nunits
; ++j
, ++k
)
6413 sel
[j
] = nunits
+ k
;
6415 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6416 if (!can_vec_perm_const_p (vec_mode
, indices
))
6418 if (i
== units_log2
)
6421 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6423 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6425 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6426 /* Whole vector shifts shift in zeros, so if init is all zero
6427 constant, there is no need to do anything further. */
6428 if ((TREE_CODE (init
) != INTEGER_CST
6429 && TREE_CODE (init
) != REAL_CST
)
6430 || !initializer_zerop (init
))
6432 tree masktype
= build_same_sized_truth_vector_type (vectype
);
6433 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6435 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6438 kind
= whole_vector_shift_kind
;
6440 if (use_whole_vector
)
6442 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6443 use_whole_vector
->safe_grow_cleared (i
);
6444 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6445 use_whole_vector
->safe_push (kind
);
6453 /* Function check_scan_store.
6455 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6458 check_scan_store (stmt_vec_info stmt_info
, tree vectype
,
6459 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6460 vect_memory_access_type memory_access_type
)
6462 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6463 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6466 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6469 || memory_access_type
!= VMAT_CONTIGUOUS
6470 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6471 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6472 || loop_vinfo
== NULL
6473 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6474 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6475 || !integer_zerop (DR_OFFSET (dr_info
->dr
))
6476 || !integer_zerop (DR_INIT (dr_info
->dr
))
6477 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6478 || !alias_sets_conflict_p (get_alias_set (vectype
),
6479 get_alias_set (TREE_TYPE (ref_type
))))
6481 if (dump_enabled_p ())
6482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6483 "unsupported OpenMP scan store.\n");
6487 /* We need to pattern match code built by OpenMP lowering and simplified
6488 by following optimizations into something we can handle.
6489 #pragma omp simd reduction(inscan,+:r)
6493 #pragma omp scan inclusive (r)
6496 shall have body with:
6497 // Initialization for input phase, store the reduction initializer:
6498 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6499 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6501 // Actual input phase:
6503 r.0_5 = D.2042[_20];
6506 // Initialization for scan phase:
6507 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6513 // Actual scan phase:
6515 r.1_8 = D.2042[_20];
6517 The "omp simd array" variable D.2042 holds the privatized copy used
6518 inside of the loop and D.2043 is another one that holds copies of
6519 the current original list item. The separate GOMP_SIMD_LANE ifn
6520 kinds are there in order to allow optimizing the initializer store
6521 and combiner sequence, e.g. if it is originally some C++ish user
6522 defined reduction, but allow the vectorizer to pattern recognize it
6523 and turn into the appropriate vectorized scan.
6525 For exclusive scan, this is slightly different:
6526 #pragma omp simd reduction(inscan,+:r)
6530 #pragma omp scan exclusive (r)
6533 shall have body with:
6534 // Initialization for input phase, store the reduction initializer:
6535 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6536 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6538 // Actual input phase:
6540 r.0_5 = D.2042[_20];
6543 // Initialization for scan phase:
6544 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6550 // Actual scan phase:
6552 r.1_8 = D.2044[_20];
6555 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6557 /* Match the D.2042[_21] = 0; store above. Just require that
6558 it is a constant or external definition store. */
6559 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6562 if (dump_enabled_p ())
6563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6564 "unsupported OpenMP scan initializer store.\n");
6568 if (! loop_vinfo
->scan_map
)
6569 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6570 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6571 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6574 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6576 /* These stores can be vectorized normally. */
6580 if (rhs_dt
!= vect_internal_def
)
6583 if (dump_enabled_p ())
6584 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6585 "unsupported OpenMP scan combiner pattern.\n");
6589 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6590 tree rhs
= gimple_assign_rhs1 (stmt
);
6591 if (TREE_CODE (rhs
) != SSA_NAME
)
6594 gimple
*other_store_stmt
= NULL
;
6595 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6596 bool inscan_var_store
6597 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6599 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6601 if (!inscan_var_store
)
6603 use_operand_p use_p
;
6604 imm_use_iterator iter
;
6605 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6607 gimple
*use_stmt
= USE_STMT (use_p
);
6608 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6610 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6611 || !is_gimple_assign (use_stmt
)
6612 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6614 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6616 other_store_stmt
= use_stmt
;
6618 if (other_store_stmt
== NULL
)
6620 rhs
= gimple_assign_lhs (other_store_stmt
);
6621 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6625 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6627 use_operand_p use_p
;
6628 imm_use_iterator iter
;
6629 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6631 gimple
*use_stmt
= USE_STMT (use_p
);
6632 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6634 if (other_store_stmt
)
6636 other_store_stmt
= use_stmt
;
6642 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6643 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6644 || !is_gimple_assign (def_stmt
)
6645 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6648 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6649 /* For pointer addition, we should use the normal plus for the vector
6653 case POINTER_PLUS_EXPR
:
6656 case MULT_HIGHPART_EXPR
:
6661 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6664 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6665 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6666 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6669 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6670 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6671 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6672 || !gimple_assign_load_p (load1_stmt
)
6673 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6674 || !gimple_assign_load_p (load2_stmt
))
6677 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6678 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6679 if (load1_stmt_info
== NULL
6680 || load2_stmt_info
== NULL
6681 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6682 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6683 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6684 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6687 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6689 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6690 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6691 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6693 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6695 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6699 use_operand_p use_p
;
6700 imm_use_iterator iter
;
6701 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6703 gimple
*use_stmt
= USE_STMT (use_p
);
6704 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6706 if (other_store_stmt
)
6708 other_store_stmt
= use_stmt
;
6712 if (other_store_stmt
== NULL
)
6714 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6715 || !gimple_store_p (other_store_stmt
))
6718 stmt_vec_info other_store_stmt_info
6719 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6720 if (other_store_stmt_info
== NULL
6721 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6722 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6725 gimple
*stmt1
= stmt
;
6726 gimple
*stmt2
= other_store_stmt
;
6727 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6728 std::swap (stmt1
, stmt2
);
6729 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6730 gimple_assign_rhs1 (load2_stmt
)))
6732 std::swap (rhs1
, rhs2
);
6733 std::swap (load1_stmt
, load2_stmt
);
6734 std::swap (load1_stmt_info
, load2_stmt_info
);
6736 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6737 gimple_assign_rhs1 (load1_stmt
)))
6740 tree var3
= NULL_TREE
;
6741 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6742 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6743 gimple_assign_rhs1 (load2_stmt
)))
6745 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6747 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6748 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6749 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6751 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6752 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6753 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6754 || lookup_attribute ("omp simd inscan exclusive",
6755 DECL_ATTRIBUTES (var3
)))
6759 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6760 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6761 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6764 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6765 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6766 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6767 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6768 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6769 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6772 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6773 std::swap (var1
, var2
);
6775 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6777 if (!lookup_attribute ("omp simd inscan exclusive",
6778 DECL_ATTRIBUTES (var1
)))
6783 if (loop_vinfo
->scan_map
== NULL
)
6785 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6789 /* The IL is as expected, now check if we can actually vectorize it.
6796 should be vectorized as (where _40 is the vectorized rhs
6797 from the D.2042[_21] = 0; store):
6798 _30 = MEM <vector(8) int> [(int *)&D.2043];
6799 _31 = MEM <vector(8) int> [(int *)&D.2042];
6800 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6802 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6803 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6805 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6806 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6807 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6809 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6810 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6812 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6813 MEM <vector(8) int> [(int *)&D.2043] = _39;
6814 MEM <vector(8) int> [(int *)&D.2042] = _38;
6821 should be vectorized as (where _40 is the vectorized rhs
6822 from the D.2042[_21] = 0; store):
6823 _30 = MEM <vector(8) int> [(int *)&D.2043];
6824 _31 = MEM <vector(8) int> [(int *)&D.2042];
6825 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6826 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6828 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6829 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6830 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6832 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6833 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6834 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6836 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6837 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6840 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6841 MEM <vector(8) int> [(int *)&D.2044] = _39;
6842 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6843 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6844 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6845 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6848 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6849 if (units_log2
== -1)
6856 /* Function vectorizable_scan_store.
6858 Helper of vectorizable_score, arguments like on vectorizable_store.
6859 Handle only the transformation, checking is done in check_scan_store. */
6862 vectorizable_scan_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6863 stmt_vec_info
*vec_stmt
, int ncopies
)
6865 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6866 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6867 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
6868 vec_info
*vinfo
= stmt_info
->vinfo
;
6869 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6871 if (dump_enabled_p ())
6872 dump_printf_loc (MSG_NOTE
, vect_location
,
6873 "transform scan store. ncopies = %d\n", ncopies
);
6875 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6876 tree rhs
= gimple_assign_rhs1 (stmt
);
6877 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
6879 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6880 bool inscan_var_store
6881 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6883 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6885 use_operand_p use_p
;
6886 imm_use_iterator iter
;
6887 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6889 gimple
*use_stmt
= USE_STMT (use_p
);
6890 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6892 rhs
= gimple_assign_lhs (use_stmt
);
6897 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6898 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6899 if (code
== POINTER_PLUS_EXPR
)
6901 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
6902 && commutative_tree_code (code
));
6903 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6904 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6905 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
6906 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6907 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6908 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6909 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6910 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6911 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6912 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6913 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6915 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6917 std::swap (rhs1
, rhs2
);
6918 std::swap (var1
, var2
);
6919 std::swap (load1_dr_info
, load2_dr_info
);
6922 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6925 unsigned HOST_WIDE_INT nunits
;
6926 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6928 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
6929 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
6930 gcc_assert (units_log2
> 0);
6931 auto_vec
<tree
, 16> perms
;
6932 perms
.quick_grow (units_log2
+ 1);
6933 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
6934 for (int i
= 0; i
<= units_log2
; ++i
)
6936 unsigned HOST_WIDE_INT j
, k
;
6937 vec_perm_builder
sel (nunits
, nunits
, 1);
6938 sel
.quick_grow (nunits
);
6939 if (i
== units_log2
)
6940 for (j
= 0; j
< nunits
; ++j
)
6941 sel
[j
] = nunits
- 1;
6944 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6946 for (k
= 0; j
< nunits
; ++j
, ++k
)
6947 sel
[j
] = nunits
+ k
;
6949 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6950 if (!use_whole_vector
.is_empty ()
6951 && use_whole_vector
[i
] != scan_store_kind_perm
)
6953 if (zero_vec
== NULL_TREE
)
6954 zero_vec
= build_zero_cst (vectype
);
6955 if (masktype
== NULL_TREE
6956 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
6957 masktype
= build_same_sized_truth_vector_type (vectype
);
6958 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
6961 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
6964 stmt_vec_info prev_stmt_info
= NULL
;
6965 tree vec_oprnd1
= NULL_TREE
;
6966 tree vec_oprnd2
= NULL_TREE
;
6967 tree vec_oprnd3
= NULL_TREE
;
6968 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
6969 tree dataref_offset
= build_int_cst (ref_type
, 0);
6970 tree bump
= vect_get_data_ptr_increment (dr_info
, vectype
, VMAT_CONTIGUOUS
);
6971 tree ldataref_ptr
= NULL_TREE
;
6972 tree orig
= NULL_TREE
;
6973 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6974 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
6975 for (int j
= 0; j
< ncopies
; j
++)
6977 stmt_vec_info new_stmt_info
;
6980 vec_oprnd1
= vect_get_vec_def_for_operand (*init
, stmt_info
);
6981 if (ldataref_ptr
== NULL
)
6982 vec_oprnd2
= vect_get_vec_def_for_operand (rhs1
, stmt_info
);
6983 vec_oprnd3
= vect_get_vec_def_for_operand (rhs2
, stmt_info
);
6988 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
6989 if (ldataref_ptr
== NULL
)
6990 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
6991 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
6992 if (!inscan_var_store
)
6993 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
6998 vec_oprnd2
= make_ssa_name (vectype
);
6999 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7000 unshare_expr (ldataref_ptr
),
7002 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7003 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7004 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7005 if (prev_stmt_info
== NULL
)
7006 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7008 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7009 prev_stmt_info
= new_stmt_info
;
7012 tree v
= vec_oprnd2
;
7013 for (int i
= 0; i
< units_log2
; ++i
)
7015 tree new_temp
= make_ssa_name (vectype
);
7016 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7018 && (use_whole_vector
[i
]
7019 != scan_store_kind_perm
))
7020 ? zero_vec
: vec_oprnd1
, v
,
7022 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7023 if (prev_stmt_info
== NULL
)
7024 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7026 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7027 prev_stmt_info
= new_stmt_info
;
7029 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7031 /* Whole vector shift shifted in zero bits, but if *init
7032 is not initializer_zerop, we need to replace those elements
7033 with elements from vec_oprnd1. */
7034 tree_vector_builder
vb (masktype
, nunits
, 1);
7035 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7036 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7037 ? boolean_false_node
: boolean_true_node
);
7039 tree new_temp2
= make_ssa_name (vectype
);
7040 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7041 new_temp
, vec_oprnd1
);
7042 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7043 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7044 prev_stmt_info
= new_stmt_info
;
7045 new_temp
= new_temp2
;
7048 /* For exclusive scan, perform the perms[i] permutation once
7051 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7059 tree new_temp2
= make_ssa_name (vectype
);
7060 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7061 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7062 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7063 prev_stmt_info
= new_stmt_info
;
7068 tree new_temp
= make_ssa_name (vectype
);
7069 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7070 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7071 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7072 prev_stmt_info
= new_stmt_info
;
7074 tree last_perm_arg
= new_temp
;
7075 /* For exclusive scan, new_temp computed above is the exclusive scan
7076 prefix sum. Turn it into inclusive prefix sum for the broadcast
7077 of the last element into orig. */
7078 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7080 last_perm_arg
= make_ssa_name (vectype
);
7081 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7082 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7083 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7084 prev_stmt_info
= new_stmt_info
;
7087 orig
= make_ssa_name (vectype
);
7088 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7089 last_perm_arg
, perms
[units_log2
]);
7090 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7091 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7092 prev_stmt_info
= new_stmt_info
;
7094 if (!inscan_var_store
)
7096 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7097 unshare_expr (dataref_ptr
),
7099 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7100 g
= gimple_build_assign (data_ref
, new_temp
);
7101 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7102 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7103 prev_stmt_info
= new_stmt_info
;
7107 if (inscan_var_store
)
7108 for (int j
= 0; j
< ncopies
; j
++)
7111 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7113 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7114 unshare_expr (dataref_ptr
),
7116 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7117 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7118 stmt_vec_info new_stmt_info
7119 = vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7120 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7121 prev_stmt_info
= new_stmt_info
;
7127 /* Function vectorizable_store.
7129 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7130 that can be vectorized.
7131 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7132 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7133 Return true if STMT_INFO is vectorizable in this way. */
7136 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7137 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7138 stmt_vector_for_cost
*cost_vec
)
7142 tree vec_oprnd
= NULL_TREE
;
7144 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7145 struct loop
*loop
= NULL
;
7146 machine_mode vec_mode
;
7148 enum dr_alignment_support alignment_support_scheme
;
7149 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7150 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7151 stmt_vec_info prev_stmt_info
= NULL
;
7152 tree dataref_ptr
= NULL_TREE
;
7153 tree dataref_offset
= NULL_TREE
;
7154 gimple
*ptr_incr
= NULL
;
7157 stmt_vec_info first_stmt_info
;
7159 unsigned int group_size
, i
;
7160 vec
<tree
> oprnds
= vNULL
;
7161 vec
<tree
> result_chain
= vNULL
;
7162 tree offset
= NULL_TREE
;
7163 vec
<tree
> vec_oprnds
= vNULL
;
7164 bool slp
= (slp_node
!= NULL
);
7165 unsigned int vec_num
;
7166 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7167 vec_info
*vinfo
= stmt_info
->vinfo
;
7169 gather_scatter_info gs_info
;
7171 vec_load_store_type vls_type
;
7174 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7177 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7181 /* Is vectorizable store? */
7183 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7184 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7186 tree scalar_dest
= gimple_assign_lhs (assign
);
7187 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7188 && is_pattern_stmt_p (stmt_info
))
7189 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7190 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7191 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7192 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7193 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7194 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7195 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7196 && TREE_CODE (scalar_dest
) != MEM_REF
)
7201 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7202 if (!call
|| !gimple_call_internal_p (call
))
7205 internal_fn ifn
= gimple_call_internal_fn (call
);
7206 if (!internal_store_fn_p (ifn
))
7209 if (slp_node
!= NULL
)
7211 if (dump_enabled_p ())
7212 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7213 "SLP of masked stores not supported.\n");
7217 int mask_index
= internal_fn_mask_index (ifn
);
7218 if (mask_index
>= 0)
7220 mask
= gimple_call_arg (call
, mask_index
);
7221 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
7227 op
= vect_get_store_rhs (stmt_info
);
7229 /* Cannot have hybrid store SLP -- that would mean storing to the
7230 same location twice. */
7231 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7233 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7234 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7238 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7239 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7244 /* Multiple types in SLP are handled by creating the appropriate number of
7245 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7250 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7252 gcc_assert (ncopies
>= 1);
7254 /* FORNOW. This restriction should be relaxed. */
7255 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7257 if (dump_enabled_p ())
7258 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7259 "multiple types in nested loop.\n");
7263 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7266 elem_type
= TREE_TYPE (vectype
);
7267 vec_mode
= TYPE_MODE (vectype
);
7269 if (!STMT_VINFO_DATA_REF (stmt_info
))
7272 vect_memory_access_type memory_access_type
;
7273 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
7274 &memory_access_type
, &gs_info
))
7279 if (memory_access_type
== VMAT_CONTIGUOUS
)
7281 if (!VECTOR_MODE_P (vec_mode
)
7282 || !can_vec_mask_load_store_p (vec_mode
,
7283 TYPE_MODE (mask_vectype
), false))
7286 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7287 && (memory_access_type
!= VMAT_GATHER_SCATTER
7288 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7290 if (dump_enabled_p ())
7291 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7292 "unsupported access type for masked store.\n");
7298 /* FORNOW. In some cases can vectorize even if data-type not supported
7299 (e.g. - array initialization with 0). */
7300 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7304 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7305 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7306 && memory_access_type
!= VMAT_GATHER_SCATTER
7307 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7310 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7311 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7312 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7316 first_stmt_info
= stmt_info
;
7317 first_dr_info
= dr_info
;
7318 group_size
= vec_num
= 1;
7321 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7323 if (!check_scan_store (stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7324 memory_access_type
))
7328 if (!vec_stmt
) /* transformation not required. */
7330 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7333 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7334 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7335 memory_access_type
, &gs_info
);
7337 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7338 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
7339 vls_type
, slp_node
, cost_vec
);
7342 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7346 ensure_base_align (dr_info
);
7348 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7350 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7351 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7352 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7353 tree ptr
, var
, scale
, vec_mask
;
7354 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7355 tree mask_halfvectype
= mask_vectype
;
7356 edge pe
= loop_preheader_edge (loop
);
7359 enum { NARROW
, NONE
, WIDEN
} modifier
;
7360 poly_uint64 scatter_off_nunits
7361 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7363 if (known_eq (nunits
, scatter_off_nunits
))
7365 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7369 /* Currently gathers and scatters are only supported for
7370 fixed-length vectors. */
7371 unsigned int count
= scatter_off_nunits
.to_constant ();
7372 vec_perm_builder
sel (count
, count
, 1);
7373 for (i
= 0; i
< (unsigned int) count
; ++i
)
7374 sel
.quick_push (i
| (count
/ 2));
7376 vec_perm_indices
indices (sel
, 1, count
);
7377 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7379 gcc_assert (perm_mask
!= NULL_TREE
);
7381 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7385 /* Currently gathers and scatters are only supported for
7386 fixed-length vectors. */
7387 unsigned int count
= nunits
.to_constant ();
7388 vec_perm_builder
sel (count
, count
, 1);
7389 for (i
= 0; i
< (unsigned int) count
; ++i
)
7390 sel
.quick_push (i
| (count
/ 2));
7392 vec_perm_indices
indices (sel
, 2, count
);
7393 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7394 gcc_assert (perm_mask
!= NULL_TREE
);
7399 = build_same_sized_truth_vector_type (gs_info
.offset_vectype
);
7404 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7405 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7406 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7407 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7408 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7409 scaletype
= TREE_VALUE (arglist
);
7411 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7412 && TREE_CODE (rettype
) == VOID_TYPE
);
7414 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7415 if (!is_gimple_min_invariant (ptr
))
7417 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7418 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7419 gcc_assert (!new_bb
);
7422 if (mask
== NULL_TREE
)
7424 mask_arg
= build_int_cst (masktype
, -1);
7425 mask_arg
= vect_init_vector (stmt_info
, mask_arg
, masktype
, NULL
);
7428 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7430 prev_stmt_info
= NULL
;
7431 for (j
= 0; j
< ncopies
; ++j
)
7435 src
= vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt_info
);
7436 op
= vec_oprnd0
= vect_get_vec_def_for_operand (gs_info
.offset
,
7439 mask_op
= vec_mask
= vect_get_vec_def_for_operand (mask
,
7442 else if (modifier
!= NONE
&& (j
& 1))
7444 if (modifier
== WIDEN
)
7447 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7449 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
7453 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7456 else if (modifier
== NARROW
)
7458 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
7460 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7468 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7470 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7473 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7477 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7479 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7480 TYPE_VECTOR_SUBPARTS (srctype
)));
7481 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7482 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7484 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7485 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7489 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7491 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7492 TYPE_VECTOR_SUBPARTS (idxtype
)));
7493 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7494 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7496 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7497 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7505 if (modifier
== NARROW
)
7507 var
= vect_get_new_ssa_name (mask_halfvectype
,
7510 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7511 : VEC_UNPACK_LO_EXPR
,
7513 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7516 tree optype
= TREE_TYPE (mask_arg
);
7517 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7520 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7521 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7522 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7524 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7525 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7527 if (!useless_type_conversion_p (masktype
, utype
))
7529 gcc_assert (TYPE_PRECISION (utype
)
7530 <= TYPE_PRECISION (masktype
));
7531 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7532 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7533 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7539 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7540 stmt_vec_info new_stmt_info
7541 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7543 if (prev_stmt_info
== NULL
)
7544 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7546 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7547 prev_stmt_info
= new_stmt_info
;
7551 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7552 return vectorizable_scan_store (stmt_info
, gsi
, vec_stmt
, ncopies
);
7554 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7555 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7560 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7562 /* We vectorize all the stmts of the interleaving group when we
7563 reach the last stmt in the group. */
7564 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7565 < DR_GROUP_SIZE (first_stmt_info
)
7574 grouped_store
= false;
7575 /* VEC_NUM is the number of vect stmts to be created for this
7577 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7578 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7579 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7580 == first_stmt_info
);
7581 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7582 op
= vect_get_store_rhs (first_stmt_info
);
7585 /* VEC_NUM is the number of vect stmts to be created for this
7587 vec_num
= group_size
;
7589 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7592 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7594 if (dump_enabled_p ())
7595 dump_printf_loc (MSG_NOTE
, vect_location
,
7596 "transform store. ncopies = %d\n", ncopies
);
7598 if (memory_access_type
== VMAT_ELEMENTWISE
7599 || memory_access_type
== VMAT_STRIDED_SLP
)
7601 gimple_stmt_iterator incr_gsi
;
7607 tree stride_base
, stride_step
, alias_off
;
7610 /* Checked by get_load_store_type. */
7611 unsigned int const_nunits
= nunits
.to_constant ();
7613 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7614 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7617 = fold_build_pointer_plus
7618 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7619 size_binop (PLUS_EXPR
,
7620 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
7621 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7622 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7624 /* For a store with loop-invariant (but other than power-of-2)
7625 stride (i.e. not a grouped access) like so:
7627 for (i = 0; i < n; i += stride)
7630 we generate a new induction variable and new stores from
7631 the components of the (vectorized) rhs:
7633 for (j = 0; ; j += VF*stride)
7638 array[j + stride] = tmp2;
7642 unsigned nstores
= const_nunits
;
7644 tree ltype
= elem_type
;
7645 tree lvectype
= vectype
;
7648 if (group_size
< const_nunits
7649 && const_nunits
% group_size
== 0)
7651 nstores
= const_nunits
/ group_size
;
7653 ltype
= build_vector_type (elem_type
, group_size
);
7656 /* First check if vec_extract optab doesn't support extraction
7657 of vector elts directly. */
7658 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7660 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
7661 || !VECTOR_MODE_P (vmode
)
7662 || !targetm
.vector_mode_supported_p (vmode
)
7663 || (convert_optab_handler (vec_extract_optab
,
7664 TYPE_MODE (vectype
), vmode
)
7665 == CODE_FOR_nothing
))
7667 /* Try to avoid emitting an extract of vector elements
7668 by performing the extracts using an integer type of the
7669 same size, extracting from a vector of those and then
7670 re-interpreting it as the original vector type if
7673 = group_size
* GET_MODE_BITSIZE (elmode
);
7674 unsigned int lnunits
= const_nunits
/ group_size
;
7675 /* If we can't construct such a vector fall back to
7676 element extracts from the original vector type and
7677 element size stores. */
7678 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7679 && mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7680 && VECTOR_MODE_P (vmode
)
7681 && targetm
.vector_mode_supported_p (vmode
)
7682 && (convert_optab_handler (vec_extract_optab
,
7684 != CODE_FOR_nothing
))
7688 ltype
= build_nonstandard_integer_type (lsize
, 1);
7689 lvectype
= build_vector_type (ltype
, nstores
);
7691 /* Else fall back to vector extraction anyway.
7692 Fewer stores are more important than avoiding spilling
7693 of the vector we extract from. Compared to the
7694 construction case in vectorizable_load no store-forwarding
7695 issue exists here for reasonable archs. */
7698 else if (group_size
>= const_nunits
7699 && group_size
% const_nunits
== 0)
7702 lnel
= const_nunits
;
7706 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7707 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7710 ivstep
= stride_step
;
7711 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7712 build_int_cst (TREE_TYPE (ivstep
), vf
));
7714 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7716 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7717 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7718 create_iv (stride_base
, ivstep
, NULL
,
7719 loop
, &incr_gsi
, insert_after
,
7721 incr
= gsi_stmt (incr_gsi
);
7722 loop_vinfo
->add_stmt (incr
);
7724 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7726 prev_stmt_info
= NULL
;
7727 alias_off
= build_int_cst (ref_type
, 0);
7728 stmt_vec_info next_stmt_info
= first_stmt_info
;
7729 for (g
= 0; g
< group_size
; g
++)
7731 running_off
= offvar
;
7734 tree size
= TYPE_SIZE_UNIT (ltype
);
7735 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7737 tree newoff
= copy_ssa_name (running_off
, NULL
);
7738 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7740 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7741 running_off
= newoff
;
7743 unsigned int group_el
= 0;
7744 unsigned HOST_WIDE_INT
7745 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7746 for (j
= 0; j
< ncopies
; j
++)
7748 /* We've set op and dt above, from vect_get_store_rhs,
7749 and first_stmt_info == stmt_info. */
7754 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
7755 &vec_oprnds
, NULL
, slp_node
);
7756 vec_oprnd
= vec_oprnds
[0];
7760 op
= vect_get_store_rhs (next_stmt_info
);
7761 vec_oprnd
= vect_get_vec_def_for_operand
7762 (op
, next_stmt_info
);
7768 vec_oprnd
= vec_oprnds
[j
];
7770 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
7773 /* Pun the vector to extract from if necessary. */
7774 if (lvectype
!= vectype
)
7776 tree tem
= make_ssa_name (lvectype
);
7778 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7779 lvectype
, vec_oprnd
));
7780 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
7783 for (i
= 0; i
< nstores
; i
++)
7785 tree newref
, newoff
;
7786 gimple
*incr
, *assign
;
7787 tree size
= TYPE_SIZE (ltype
);
7788 /* Extract the i'th component. */
7789 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7790 bitsize_int (i
), size
);
7791 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7794 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7798 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7800 newref
= build2 (MEM_REF
, ltype
,
7801 running_off
, this_off
);
7802 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7804 /* And store it to *running_off. */
7805 assign
= gimple_build_assign (newref
, elem
);
7806 stmt_vec_info assign_info
7807 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
7811 || group_el
== group_size
)
7813 newoff
= copy_ssa_name (running_off
, NULL
);
7814 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7815 running_off
, stride_step
);
7816 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7818 running_off
= newoff
;
7821 if (g
== group_size
- 1
7824 if (j
== 0 && i
== 0)
7825 STMT_VINFO_VEC_STMT (stmt_info
)
7826 = *vec_stmt
= assign_info
;
7828 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
7829 prev_stmt_info
= assign_info
;
7833 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7838 vec_oprnds
.release ();
7842 auto_vec
<tree
> dr_chain (group_size
);
7843 oprnds
.create (group_size
);
7845 alignment_support_scheme
7846 = vect_supportable_dr_alignment (first_dr_info
, false);
7847 gcc_assert (alignment_support_scheme
);
7848 vec_loop_masks
*loop_masks
7849 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7850 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7852 /* Targets with store-lane instructions must not require explicit
7853 realignment. vect_supportable_dr_alignment always returns either
7854 dr_aligned or dr_unaligned_supported for masked operations. */
7855 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7858 || alignment_support_scheme
== dr_aligned
7859 || alignment_support_scheme
== dr_unaligned_supported
);
7861 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
7862 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7863 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7866 tree vec_offset
= NULL_TREE
;
7867 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7869 aggr_type
= NULL_TREE
;
7872 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7874 aggr_type
= elem_type
;
7875 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7876 &bump
, &vec_offset
);
7880 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7881 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7883 aggr_type
= vectype
;
7884 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
7885 memory_access_type
);
7889 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
7891 /* In case the vectorization factor (VF) is bigger than the number
7892 of elements that we can fit in a vectype (nunits), we have to generate
7893 more than one vector stmt - i.e - we need to "unroll" the
7894 vector stmt by a factor VF/nunits. For more details see documentation in
7895 vect_get_vec_def_for_copy_stmt. */
7897 /* In case of interleaving (non-unit grouped access):
7904 We create vectorized stores starting from base address (the access of the
7905 first stmt in the chain (S2 in the above example), when the last store stmt
7906 of the chain (S4) is reached:
7909 VS2: &base + vec_size*1 = vx0
7910 VS3: &base + vec_size*2 = vx1
7911 VS4: &base + vec_size*3 = vx3
7913 Then permutation statements are generated:
7915 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7916 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7919 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7920 (the order of the data-refs in the output of vect_permute_store_chain
7921 corresponds to the order of scalar stmts in the interleaving chain - see
7922 the documentation of vect_permute_store_chain()).
7924 In case of both multiple types and interleaving, above vector stores and
7925 permutation stmts are created for every copy. The result vector stmts are
7926 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7927 STMT_VINFO_RELATED_STMT for the next copies.
7930 prev_stmt_info
= NULL
;
7931 tree vec_mask
= NULL_TREE
;
7932 for (j
= 0; j
< ncopies
; j
++)
7934 stmt_vec_info new_stmt_info
;
7939 /* Get vectorized arguments for SLP_NODE. */
7940 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
7943 vec_oprnd
= vec_oprnds
[0];
7947 /* For interleaved stores we collect vectorized defs for all the
7948 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7949 used as an input to vect_permute_store_chain(), and OPRNDS as
7950 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
7952 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7953 OPRNDS are of size 1. */
7954 stmt_vec_info next_stmt_info
= first_stmt_info
;
7955 for (i
= 0; i
< group_size
; i
++)
7957 /* Since gaps are not supported for interleaved stores,
7958 DR_GROUP_SIZE is the exact number of stmts in the chain.
7959 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7960 that there is no interleaving, DR_GROUP_SIZE is 1,
7961 and only one iteration of the loop will be executed. */
7962 op
= vect_get_store_rhs (next_stmt_info
);
7963 vec_oprnd
= vect_get_vec_def_for_operand
7964 (op
, next_stmt_info
);
7965 dr_chain
.quick_push (vec_oprnd
);
7966 oprnds
.quick_push (vec_oprnd
);
7967 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7970 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
7974 /* We should have catched mismatched types earlier. */
7975 gcc_assert (useless_type_conversion_p (vectype
,
7976 TREE_TYPE (vec_oprnd
)));
7977 bool simd_lane_access_p
7978 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
7979 if (simd_lane_access_p
7981 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
7982 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
7983 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
7984 && integer_zerop (DR_INIT (first_dr_info
->dr
))
7985 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7986 get_alias_set (TREE_TYPE (ref_type
))))
7988 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
7989 dataref_offset
= build_int_cst (ref_type
, 0);
7991 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7992 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
7993 &dataref_ptr
, &vec_offset
);
7996 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
7997 simd_lane_access_p
? loop
: NULL
,
7998 offset
, &dummy
, gsi
, &ptr_incr
,
7999 simd_lane_access_p
, NULL_TREE
, bump
);
8003 /* For interleaved stores we created vectorized defs for all the
8004 defs stored in OPRNDS in the previous iteration (previous copy).
8005 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8006 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8008 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8009 OPRNDS are of size 1. */
8010 for (i
= 0; i
< group_size
; i
++)
8013 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8014 dr_chain
[i
] = vec_oprnd
;
8015 oprnds
[i
] = vec_oprnd
;
8018 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8021 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8022 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8023 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8025 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8029 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8033 /* Get an array into which we can store the individual vectors. */
8034 vec_array
= create_vector_array (vectype
, vec_num
);
8036 /* Invalidate the current contents of VEC_ARRAY. This should
8037 become an RTL clobber too, which prevents the vector registers
8038 from being upward-exposed. */
8039 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8041 /* Store the individual vectors into the array. */
8042 for (i
= 0; i
< vec_num
; i
++)
8044 vec_oprnd
= dr_chain
[i
];
8045 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
8048 tree final_mask
= NULL
;
8050 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8053 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8060 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8062 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8063 tree alias_ptr
= build_int_cst (ref_type
, align
);
8064 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8065 dataref_ptr
, alias_ptr
,
8066 final_mask
, vec_array
);
8071 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8072 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8073 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8075 gimple_call_set_lhs (call
, data_ref
);
8077 gimple_call_set_nothrow (call
, true);
8078 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8080 /* Record that VEC_ARRAY is now dead. */
8081 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8085 new_stmt_info
= NULL
;
8089 result_chain
.create (group_size
);
8091 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
8095 stmt_vec_info next_stmt_info
= first_stmt_info
;
8096 for (i
= 0; i
< vec_num
; i
++)
8099 unsigned HOST_WIDE_INT align
;
8101 tree final_mask
= NULL_TREE
;
8103 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8105 vectype
, vec_num
* j
+ i
);
8107 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8110 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8112 tree scale
= size_int (gs_info
.scale
);
8115 call
= gimple_build_call_internal
8116 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8117 scale
, vec_oprnd
, final_mask
);
8119 call
= gimple_build_call_internal
8120 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8122 gimple_call_set_nothrow (call
, true);
8124 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8129 /* Bump the vector pointer. */
8130 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8134 vec_oprnd
= vec_oprnds
[i
];
8135 else if (grouped_store
)
8136 /* For grouped stores vectorized defs are interleaved in
8137 vect_permute_store_chain(). */
8138 vec_oprnd
= result_chain
[i
];
8140 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8141 if (aligned_access_p (first_dr_info
))
8143 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8145 align
= dr_alignment (vect_dr_behavior (first_dr_info
));
8149 misalign
= DR_MISALIGNMENT (first_dr_info
);
8150 if (dataref_offset
== NULL_TREE
8151 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8152 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8155 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8157 tree perm_mask
= perm_mask_for_reverse (vectype
);
8158 tree perm_dest
= vect_create_destination_var
8159 (vect_get_store_rhs (stmt_info
), vectype
);
8160 tree new_temp
= make_ssa_name (perm_dest
);
8162 /* Generate the permute statement. */
8164 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8165 vec_oprnd
, perm_mask
);
8166 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8168 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8169 vec_oprnd
= new_temp
;
8172 /* Arguments are ready. Create the new vector stmt. */
8175 align
= least_bit_hwi (misalign
| align
);
8176 tree ptr
= build_int_cst (ref_type
, align
);
8178 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8180 final_mask
, vec_oprnd
);
8181 gimple_call_set_nothrow (call
, true);
8183 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8187 data_ref
= fold_build2 (MEM_REF
, vectype
,
8191 : build_int_cst (ref_type
, 0));
8192 if (aligned_access_p (first_dr_info
))
8194 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8195 TREE_TYPE (data_ref
)
8196 = build_aligned_type (TREE_TYPE (data_ref
),
8197 align
* BITS_PER_UNIT
);
8199 TREE_TYPE (data_ref
)
8200 = build_aligned_type (TREE_TYPE (data_ref
),
8201 TYPE_ALIGN (elem_type
));
8202 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8204 = gimple_build_assign (data_ref
, vec_oprnd
);
8206 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8212 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8213 if (!next_stmt_info
)
8220 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8222 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8223 prev_stmt_info
= new_stmt_info
;
8228 result_chain
.release ();
8229 vec_oprnds
.release ();
8234 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8235 VECTOR_CST mask. No checks are made that the target platform supports the
8236 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8237 vect_gen_perm_mask_checked. */
8240 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8244 poly_uint64 nunits
= sel
.length ();
8245 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8247 mask_type
= build_vector_type (ssizetype
, nunits
);
8248 return vec_perm_indices_to_tree (mask_type
, sel
);
8251 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8252 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8255 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8257 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8258 return vect_gen_perm_mask_any (vectype
, sel
);
8261 /* Given a vector variable X and Y, that was generated for the scalar
8262 STMT_INFO, generate instructions to permute the vector elements of X and Y
8263 using permutation mask MASK_VEC, insert them at *GSI and return the
8264 permuted vector variable. */
8267 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8268 gimple_stmt_iterator
*gsi
)
8270 tree vectype
= TREE_TYPE (x
);
8271 tree perm_dest
, data_ref
;
8274 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8275 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8276 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8278 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8279 data_ref
= make_ssa_name (perm_dest
);
8281 /* Generate the permute statement. */
8282 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8283 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8288 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8289 inserting them on the loops preheader edge. Returns true if we
8290 were successful in doing so (and thus STMT_INFO can be moved then),
8291 otherwise returns false. */
8294 hoist_defs_of_uses (stmt_vec_info stmt_info
, struct loop
*loop
)
8300 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8302 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8303 if (!gimple_nop_p (def_stmt
)
8304 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8306 /* Make sure we don't need to recurse. While we could do
8307 so in simple cases when there are more complex use webs
8308 we don't have an easy way to preserve stmt order to fulfil
8309 dependencies within them. */
8312 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8314 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8316 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8317 if (!gimple_nop_p (def_stmt2
)
8318 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8328 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8330 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8331 if (!gimple_nop_p (def_stmt
)
8332 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8334 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8335 gsi_remove (&gsi
, false);
8336 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8343 /* vectorizable_load.
8345 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8346 that can be vectorized.
8347 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8348 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8349 Return true if STMT_INFO is vectorizable in this way. */
8352 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8353 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8354 slp_instance slp_node_instance
,
8355 stmt_vector_for_cost
*cost_vec
)
8358 tree vec_dest
= NULL
;
8359 tree data_ref
= NULL
;
8360 stmt_vec_info prev_stmt_info
;
8361 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8362 struct loop
*loop
= NULL
;
8363 struct loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8364 bool nested_in_vect_loop
= false;
8369 enum dr_alignment_support alignment_support_scheme
;
8370 tree dataref_ptr
= NULL_TREE
;
8371 tree dataref_offset
= NULL_TREE
;
8372 gimple
*ptr_incr
= NULL
;
8375 unsigned int group_size
;
8376 poly_uint64 group_gap_adj
;
8377 tree msq
= NULL_TREE
, lsq
;
8378 tree offset
= NULL_TREE
;
8379 tree byte_offset
= NULL_TREE
;
8380 tree realignment_token
= NULL_TREE
;
8382 vec
<tree
> dr_chain
= vNULL
;
8383 bool grouped_load
= false;
8384 stmt_vec_info first_stmt_info
;
8385 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8386 bool compute_in_loop
= false;
8387 struct loop
*at_loop
;
8389 bool slp
= (slp_node
!= NULL
);
8390 bool slp_perm
= false;
8391 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8394 gather_scatter_info gs_info
;
8395 vec_info
*vinfo
= stmt_info
->vinfo
;
8397 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8399 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8402 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8406 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8407 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8409 scalar_dest
= gimple_assign_lhs (assign
);
8410 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8413 tree_code code
= gimple_assign_rhs_code (assign
);
8414 if (code
!= ARRAY_REF
8415 && code
!= BIT_FIELD_REF
8416 && code
!= INDIRECT_REF
8417 && code
!= COMPONENT_REF
8418 && code
!= IMAGPART_EXPR
8419 && code
!= REALPART_EXPR
8421 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8426 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8427 if (!call
|| !gimple_call_internal_p (call
))
8430 internal_fn ifn
= gimple_call_internal_fn (call
);
8431 if (!internal_load_fn_p (ifn
))
8434 scalar_dest
= gimple_call_lhs (call
);
8438 int mask_index
= internal_fn_mask_index (ifn
);
8439 if (mask_index
>= 0)
8441 mask
= gimple_call_arg (call
, mask_index
);
8442 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
8448 if (!STMT_VINFO_DATA_REF (stmt_info
))
8451 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8452 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8456 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8457 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8458 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8463 /* Multiple types in SLP are handled by creating the appropriate number of
8464 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8469 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8471 gcc_assert (ncopies
>= 1);
8473 /* FORNOW. This restriction should be relaxed. */
8474 if (nested_in_vect_loop
&& ncopies
> 1)
8476 if (dump_enabled_p ())
8477 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8478 "multiple types in nested loop.\n");
8482 /* Invalidate assumptions made by dependence analysis when vectorization
8483 on the unrolled body effectively re-orders stmts. */
8485 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8486 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8487 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8489 if (dump_enabled_p ())
8490 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8491 "cannot perform implicit CSE when unrolling "
8492 "with negative dependence distance\n");
8496 elem_type
= TREE_TYPE (vectype
);
8497 mode
= TYPE_MODE (vectype
);
8499 /* FORNOW. In some cases can vectorize even if data-type not supported
8500 (e.g. - data copies). */
8501 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8503 if (dump_enabled_p ())
8504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8505 "Aligned load, but unsupported type.\n");
8509 /* Check if the load is a part of an interleaving chain. */
8510 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8512 grouped_load
= true;
8514 gcc_assert (!nested_in_vect_loop
);
8515 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8517 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8518 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8520 /* Refuse non-SLP vectorization of SLP-only groups. */
8521 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8523 if (dump_enabled_p ())
8524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8525 "cannot vectorize load in non-SLP mode.\n");
8529 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8532 /* Invalidate assumptions made by dependence analysis when vectorization
8533 on the unrolled body effectively re-orders stmts. */
8534 if (!PURE_SLP_STMT (stmt_info
)
8535 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8536 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8537 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8539 if (dump_enabled_p ())
8540 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8541 "cannot perform implicit CSE when performing "
8542 "group loads with negative dependence distance\n");
8549 vect_memory_access_type memory_access_type
;
8550 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
8551 &memory_access_type
, &gs_info
))
8556 if (memory_access_type
== VMAT_CONTIGUOUS
)
8558 machine_mode vec_mode
= TYPE_MODE (vectype
);
8559 if (!VECTOR_MODE_P (vec_mode
)
8560 || !can_vec_mask_load_store_p (vec_mode
,
8561 TYPE_MODE (mask_vectype
), true))
8564 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8565 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8567 if (dump_enabled_p ())
8568 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8569 "unsupported access type for masked load.\n");
8574 if (!vec_stmt
) /* transformation not required. */
8577 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8580 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8581 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8582 memory_access_type
, &gs_info
);
8584 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8585 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
8586 slp_node_instance
, slp_node
, cost_vec
);
8591 gcc_assert (memory_access_type
8592 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8594 if (dump_enabled_p ())
8595 dump_printf_loc (MSG_NOTE
, vect_location
,
8596 "transform load. ncopies = %d\n", ncopies
);
8600 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8601 ensure_base_align (dr_info
);
8603 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8605 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8609 if (memory_access_type
== VMAT_INVARIANT
)
8611 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8612 /* If we have versioned for aliasing or the loop doesn't
8613 have any data dependencies that would preclude this,
8614 then we are sure this is a loop invariant load and
8615 thus we can insert it on the preheader edge. */
8616 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8617 && !nested_in_vect_loop
8618 && hoist_defs_of_uses (stmt_info
, loop
));
8621 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8622 if (dump_enabled_p ())
8623 dump_printf_loc (MSG_NOTE
, vect_location
,
8624 "hoisting out of the vectorized loop: %G", stmt
);
8625 scalar_dest
= copy_ssa_name (scalar_dest
);
8626 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8627 gsi_insert_on_edge_immediate
8628 (loop_preheader_edge (loop
),
8629 gimple_build_assign (scalar_dest
, rhs
));
8631 /* These copies are all equivalent, but currently the representation
8632 requires a separate STMT_VINFO_VEC_STMT for each one. */
8633 prev_stmt_info
= NULL
;
8634 gimple_stmt_iterator gsi2
= *gsi
;
8636 for (j
= 0; j
< ncopies
; j
++)
8638 stmt_vec_info new_stmt_info
;
8641 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8643 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8644 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8648 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8650 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8653 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8655 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8657 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8658 prev_stmt_info
= new_stmt_info
;
8663 if (memory_access_type
== VMAT_ELEMENTWISE
8664 || memory_access_type
== VMAT_STRIDED_SLP
)
8666 gimple_stmt_iterator incr_gsi
;
8672 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8673 tree stride_base
, stride_step
, alias_off
;
8674 /* Checked by get_load_store_type. */
8675 unsigned int const_nunits
= nunits
.to_constant ();
8676 unsigned HOST_WIDE_INT cst_offset
= 0;
8678 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8679 gcc_assert (!nested_in_vect_loop
);
8683 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8684 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8688 first_stmt_info
= stmt_info
;
8689 first_dr_info
= dr_info
;
8691 if (slp
&& grouped_load
)
8693 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8694 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8700 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8701 * vect_get_place_in_interleaving_chain (stmt_info
,
8704 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8708 = fold_build_pointer_plus
8709 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8710 size_binop (PLUS_EXPR
,
8711 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
8712 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8713 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8715 /* For a load with loop-invariant (but other than power-of-2)
8716 stride (i.e. not a grouped access) like so:
8718 for (i = 0; i < n; i += stride)
8721 we generate a new induction variable and new accesses to
8722 form a new vector (or vectors, depending on ncopies):
8724 for (j = 0; ; j += VF*stride)
8726 tmp2 = array[j + stride];
8728 vectemp = {tmp1, tmp2, ...}
8731 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8732 build_int_cst (TREE_TYPE (stride_step
), vf
));
8734 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8736 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8737 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8738 create_iv (stride_base
, ivstep
, NULL
,
8739 loop
, &incr_gsi
, insert_after
,
8741 incr
= gsi_stmt (incr_gsi
);
8742 loop_vinfo
->add_stmt (incr
);
8744 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8746 prev_stmt_info
= NULL
;
8747 running_off
= offvar
;
8748 alias_off
= build_int_cst (ref_type
, 0);
8749 int nloads
= const_nunits
;
8751 tree ltype
= TREE_TYPE (vectype
);
8752 tree lvectype
= vectype
;
8753 auto_vec
<tree
> dr_chain
;
8754 if (memory_access_type
== VMAT_STRIDED_SLP
)
8756 if (group_size
< const_nunits
)
8758 /* First check if vec_init optab supports construction from
8759 vector elts directly. */
8760 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
8762 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
8763 && VECTOR_MODE_P (vmode
)
8764 && targetm
.vector_mode_supported_p (vmode
)
8765 && (convert_optab_handler (vec_init_optab
,
8766 TYPE_MODE (vectype
), vmode
)
8767 != CODE_FOR_nothing
))
8769 nloads
= const_nunits
/ group_size
;
8771 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
8775 /* Otherwise avoid emitting a constructor of vector elements
8776 by performing the loads using an integer type of the same
8777 size, constructing a vector of those and then
8778 re-interpreting it as the original vector type.
8779 This avoids a huge runtime penalty due to the general
8780 inability to perform store forwarding from smaller stores
8781 to a larger load. */
8783 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
8784 unsigned int lnunits
= const_nunits
/ group_size
;
8785 /* If we can't construct such a vector fall back to
8786 element loads of the original vector type. */
8787 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8788 && mode_for_vector (elmode
, lnunits
).exists (&vmode
)
8789 && VECTOR_MODE_P (vmode
)
8790 && targetm
.vector_mode_supported_p (vmode
)
8791 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
8792 != CODE_FOR_nothing
))
8796 ltype
= build_nonstandard_integer_type (lsize
, 1);
8797 lvectype
= build_vector_type (ltype
, nloads
);
8804 lnel
= const_nunits
;
8807 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8809 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8810 else if (nloads
== 1)
8815 /* For SLP permutation support we need to load the whole group,
8816 not only the number of vector stmts the permutation result
8820 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8822 unsigned int const_vf
= vf
.to_constant ();
8823 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8824 dr_chain
.create (ncopies
);
8827 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8829 unsigned int group_el
= 0;
8830 unsigned HOST_WIDE_INT
8831 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8832 for (j
= 0; j
< ncopies
; j
++)
8835 vec_alloc (v
, nloads
);
8836 stmt_vec_info new_stmt_info
= NULL
;
8837 for (i
= 0; i
< nloads
; i
++)
8839 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8840 group_el
* elsz
+ cst_offset
);
8841 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
8842 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8844 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
8846 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8848 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
8849 gimple_assign_lhs (new_stmt
));
8853 || group_el
== group_size
)
8855 tree newoff
= copy_ssa_name (running_off
);
8856 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8857 running_off
, stride_step
);
8858 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
8860 running_off
= newoff
;
8866 tree vec_inv
= build_constructor (lvectype
, v
);
8867 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
8868 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8869 if (lvectype
!= vectype
)
8872 = gimple_build_assign (make_ssa_name (vectype
),
8874 build1 (VIEW_CONVERT_EXPR
,
8875 vectype
, new_temp
));
8877 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8884 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
8886 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8891 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8893 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8894 prev_stmt_info
= new_stmt_info
;
8900 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8901 slp_node_instance
, false, &n_perms
);
8906 if (memory_access_type
== VMAT_GATHER_SCATTER
8907 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
8908 grouped_load
= false;
8912 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8913 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8914 /* For SLP vectorization we directly vectorize a subchain
8915 without permutation. */
8916 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8917 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8918 /* For BB vectorization always use the first stmt to base
8919 the data ref pointer on. */
8921 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8923 /* Check if the chain of loads is already vectorized. */
8924 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
8925 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8926 ??? But we can only do so if there is exactly one
8927 as we have no way to get at the rest. Leave the CSE
8929 ??? With the group load eventually participating
8930 in multiple different permutations (having multiple
8931 slp nodes which refer to the same group) the CSE
8932 is even wrong code. See PR56270. */
8935 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8938 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8941 /* VEC_NUM is the number of vect stmts to be created for this group. */
8944 grouped_load
= false;
8945 /* If an SLP permutation is from N elements to N elements,
8946 and if one vector holds a whole number of N, we can load
8947 the inputs to the permutation in the same way as an
8948 unpermuted sequence. In other cases we need to load the
8949 whole group, not only the number of vector stmts the
8950 permutation result fits in. */
8952 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
8953 || !multiple_p (nunits
, group_size
)))
8955 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8956 variable VF; see vect_transform_slp_perm_load. */
8957 unsigned int const_vf
= vf
.to_constant ();
8958 unsigned int const_nunits
= nunits
.to_constant ();
8959 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
8960 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
8964 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8966 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
8970 vec_num
= group_size
;
8972 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8976 first_stmt_info
= stmt_info
;
8977 first_dr_info
= dr_info
;
8978 group_size
= vec_num
= 1;
8980 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8983 alignment_support_scheme
8984 = vect_supportable_dr_alignment (first_dr_info
, false);
8985 gcc_assert (alignment_support_scheme
);
8986 vec_loop_masks
*loop_masks
8987 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8988 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8990 /* Targets with store-lane instructions must not require explicit
8991 realignment. vect_supportable_dr_alignment always returns either
8992 dr_aligned or dr_unaligned_supported for masked operations. */
8993 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8996 || alignment_support_scheme
== dr_aligned
8997 || alignment_support_scheme
== dr_unaligned_supported
);
8999 /* In case the vectorization factor (VF) is bigger than the number
9000 of elements that we can fit in a vectype (nunits), we have to generate
9001 more than one vector stmt - i.e - we need to "unroll" the
9002 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9003 from one copy of the vector stmt to the next, in the field
9004 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9005 stages to find the correct vector defs to be used when vectorizing
9006 stmts that use the defs of the current stmt. The example below
9007 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9008 need to create 4 vectorized stmts):
9010 before vectorization:
9011 RELATED_STMT VEC_STMT
9015 step 1: vectorize stmt S1:
9016 We first create the vector stmt VS1_0, and, as usual, record a
9017 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9018 Next, we create the vector stmt VS1_1, and record a pointer to
9019 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9020 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9022 RELATED_STMT VEC_STMT
9023 VS1_0: vx0 = memref0 VS1_1 -
9024 VS1_1: vx1 = memref1 VS1_2 -
9025 VS1_2: vx2 = memref2 VS1_3 -
9026 VS1_3: vx3 = memref3 - -
9027 S1: x = load - VS1_0
9030 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9031 information we recorded in RELATED_STMT field is used to vectorize
9034 /* In case of interleaving (non-unit grouped access):
9041 Vectorized loads are created in the order of memory accesses
9042 starting from the access of the first stmt of the chain:
9045 VS2: vx1 = &base + vec_size*1
9046 VS3: vx3 = &base + vec_size*2
9047 VS4: vx4 = &base + vec_size*3
9049 Then permutation statements are generated:
9051 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9052 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9055 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9056 (the order of the data-refs in the output of vect_permute_load_chain
9057 corresponds to the order of scalar stmts in the interleaving chain - see
9058 the documentation of vect_permute_load_chain()).
9059 The generation of permutation stmts and recording them in
9060 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9062 In case of both multiple types and interleaving, the vector loads and
9063 permutation stmts above are created for every copy. The result vector
9064 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9065 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9067 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9068 on a target that supports unaligned accesses (dr_unaligned_supported)
9069 we generate the following code:
9073 p = p + indx * vectype_size;
9078 Otherwise, the data reference is potentially unaligned on a target that
9079 does not support unaligned accesses (dr_explicit_realign_optimized) -
9080 then generate the following code, in which the data in each iteration is
9081 obtained by two vector loads, one from the previous iteration, and one
9082 from the current iteration:
9084 msq_init = *(floor(p1))
9085 p2 = initial_addr + VS - 1;
9086 realignment_token = call target_builtin;
9089 p2 = p2 + indx * vectype_size
9091 vec_dest = realign_load (msq, lsq, realignment_token)
9096 /* If the misalignment remains the same throughout the execution of the
9097 loop, we can create the init_addr and permutation mask at the loop
9098 preheader. Otherwise, it needs to be created inside the loop.
9099 This can only occur when vectorizing memory accesses in the inner-loop
9100 nested within an outer-loop that is being vectorized. */
9102 if (nested_in_vect_loop
9103 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9104 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9106 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9107 compute_in_loop
= true;
9110 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9111 || alignment_support_scheme
== dr_explicit_realign
)
9112 && !compute_in_loop
)
9114 msq
= vect_setup_realignment (first_stmt_info
, gsi
, &realignment_token
,
9115 alignment_support_scheme
, NULL_TREE
,
9117 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9119 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9120 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9127 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9128 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9131 tree vec_offset
= NULL_TREE
;
9132 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9134 aggr_type
= NULL_TREE
;
9137 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9139 aggr_type
= elem_type
;
9140 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9141 &bump
, &vec_offset
);
9145 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9146 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9148 aggr_type
= vectype
;
9149 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
9150 memory_access_type
);
9153 tree vec_mask
= NULL_TREE
;
9154 prev_stmt_info
= NULL
;
9155 poly_uint64 group_elt
= 0;
9156 for (j
= 0; j
< ncopies
; j
++)
9158 stmt_vec_info new_stmt_info
= NULL
;
9159 /* 1. Create the vector or array pointer update chain. */
9162 bool simd_lane_access_p
9163 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9164 if (simd_lane_access_p
9165 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9166 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9167 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
9168 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9169 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9170 get_alias_set (TREE_TYPE (ref_type
)))
9171 && (alignment_support_scheme
== dr_aligned
9172 || alignment_support_scheme
== dr_unaligned_supported
))
9174 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9175 dataref_offset
= build_int_cst (ref_type
, 0);
9177 else if (first_stmt_info_for_drptr
9178 && first_stmt_info
!= first_stmt_info_for_drptr
)
9181 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
9182 aggr_type
, at_loop
, offset
, &dummy
,
9183 gsi
, &ptr_incr
, simd_lane_access_p
,
9185 /* Adjust the pointer by the difference to first_stmt. */
9186 data_reference_p ptrdr
9187 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9189 = fold_convert (sizetype
,
9190 size_binop (MINUS_EXPR
,
9191 DR_INIT (first_dr_info
->dr
),
9193 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9196 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9197 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
9198 &dataref_ptr
, &vec_offset
);
9201 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
9202 offset
, &dummy
, gsi
, &ptr_incr
,
9209 auto_vec
<tree
> ops (1);
9210 auto_vec
<vec
<tree
> > vec_defs (1);
9211 ops
.quick_push (mask
);
9212 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9213 vec_mask
= vec_defs
[0][0];
9216 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
9223 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9225 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9226 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9228 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9231 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9234 if (grouped_load
|| slp_perm
)
9235 dr_chain
.create (vec_num
);
9237 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9241 vec_array
= create_vector_array (vectype
, vec_num
);
9243 tree final_mask
= NULL_TREE
;
9245 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9248 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9255 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9257 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9258 tree alias_ptr
= build_int_cst (ref_type
, align
);
9259 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9260 dataref_ptr
, alias_ptr
,
9266 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9267 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9268 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9270 gimple_call_set_lhs (call
, vec_array
);
9271 gimple_call_set_nothrow (call
, true);
9272 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
9274 /* Extract each vector into an SSA_NAME. */
9275 for (i
= 0; i
< vec_num
; i
++)
9277 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
9279 dr_chain
.quick_push (new_temp
);
9282 /* Record the mapping between SSA_NAMEs and statements. */
9283 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
9285 /* Record that VEC_ARRAY is now dead. */
9286 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
9290 for (i
= 0; i
< vec_num
; i
++)
9292 tree final_mask
= NULL_TREE
;
9294 && memory_access_type
!= VMAT_INVARIANT
)
9295 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9297 vectype
, vec_num
* j
+ i
);
9299 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9303 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9306 /* 2. Create the vector-load in the loop. */
9307 gimple
*new_stmt
= NULL
;
9308 switch (alignment_support_scheme
)
9311 case dr_unaligned_supported
:
9313 unsigned int misalign
;
9314 unsigned HOST_WIDE_INT align
;
9316 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9318 tree scale
= size_int (gs_info
.scale
);
9321 call
= gimple_build_call_internal
9322 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
9323 vec_offset
, scale
, final_mask
);
9325 call
= gimple_build_call_internal
9326 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
9328 gimple_call_set_nothrow (call
, true);
9330 data_ref
= NULL_TREE
;
9335 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9336 if (alignment_support_scheme
== dr_aligned
)
9338 gcc_assert (aligned_access_p (first_dr_info
));
9341 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9343 align
= dr_alignment
9344 (vect_dr_behavior (first_dr_info
));
9348 misalign
= DR_MISALIGNMENT (first_dr_info
);
9349 if (dataref_offset
== NULL_TREE
9350 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9351 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9356 align
= least_bit_hwi (misalign
| align
);
9357 tree ptr
= build_int_cst (ref_type
, align
);
9359 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9362 gimple_call_set_nothrow (call
, true);
9364 data_ref
= NULL_TREE
;
9368 tree ltype
= vectype
;
9369 /* If there's no peeling for gaps but we have a gap
9370 with slp loads then load the lower half of the
9371 vector only. See get_group_load_store_type for
9372 when we apply this optimization. */
9375 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9376 && DR_GROUP_GAP (first_stmt_info
) != 0
9377 && known_eq (nunits
,
9379 - DR_GROUP_GAP (first_stmt_info
)) * 2)
9380 && known_eq (nunits
, group_size
))
9381 ltype
= build_vector_type (TREE_TYPE (vectype
),
9384 (first_stmt_info
)));
9386 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
,
9389 : build_int_cst (ref_type
, 0));
9390 if (alignment_support_scheme
== dr_aligned
)
9392 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9393 TREE_TYPE (data_ref
)
9394 = build_aligned_type (TREE_TYPE (data_ref
),
9395 align
* BITS_PER_UNIT
);
9397 TREE_TYPE (data_ref
)
9398 = build_aligned_type (TREE_TYPE (data_ref
),
9399 TYPE_ALIGN (elem_type
));
9400 if (ltype
!= vectype
)
9402 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9403 tree tem
= make_ssa_name (ltype
);
9404 new_stmt
= gimple_build_assign (tem
, data_ref
);
9405 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9407 vec
<constructor_elt
, va_gc
> *v
;
9409 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9410 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9411 build_zero_cst (ltype
));
9413 = gimple_build_assign (vec_dest
,
9420 case dr_explicit_realign
:
9424 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9426 if (compute_in_loop
)
9427 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
9429 dr_explicit_realign
,
9432 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9433 ptr
= copy_ssa_name (dataref_ptr
);
9435 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9436 // For explicit realign the target alignment should be
9437 // known at compile time.
9438 unsigned HOST_WIDE_INT align
=
9439 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9440 new_stmt
= gimple_build_assign
9441 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9443 (TREE_TYPE (dataref_ptr
),
9444 -(HOST_WIDE_INT
) align
));
9445 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9447 = build2 (MEM_REF
, vectype
, ptr
,
9448 build_int_cst (ref_type
, 0));
9449 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9450 vec_dest
= vect_create_destination_var (scalar_dest
,
9452 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9453 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9454 gimple_assign_set_lhs (new_stmt
, new_temp
);
9455 gimple_set_vdef (new_stmt
, gimple_vdef (stmt_info
->stmt
));
9456 gimple_set_vuse (new_stmt
, gimple_vuse (stmt_info
->stmt
));
9457 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9460 bump
= size_binop (MULT_EXPR
, vs
,
9461 TYPE_SIZE_UNIT (elem_type
));
9462 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9463 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
9465 new_stmt
= gimple_build_assign
9466 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9468 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9469 ptr
= copy_ssa_name (ptr
, new_stmt
);
9470 gimple_assign_set_lhs (new_stmt
, ptr
);
9471 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9473 = build2 (MEM_REF
, vectype
, ptr
,
9474 build_int_cst (ref_type
, 0));
9477 case dr_explicit_realign_optimized
:
9479 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9480 new_temp
= copy_ssa_name (dataref_ptr
);
9482 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9483 // We should only be doing this if we know the target
9484 // alignment at compile time.
9485 unsigned HOST_WIDE_INT align
=
9486 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9487 new_stmt
= gimple_build_assign
9488 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9489 build_int_cst (TREE_TYPE (dataref_ptr
),
9490 -(HOST_WIDE_INT
) align
));
9491 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9493 = build2 (MEM_REF
, vectype
, new_temp
,
9494 build_int_cst (ref_type
, 0));
9500 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9501 /* DATA_REF is null if we've already built the statement. */
9504 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9505 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9507 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9508 gimple_set_lhs (new_stmt
, new_temp
);
9510 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9512 /* 3. Handle explicit realignment if necessary/supported.
9514 vec_dest = realign_load (msq, lsq, realignment_token) */
9515 if (alignment_support_scheme
== dr_explicit_realign_optimized
9516 || alignment_support_scheme
== dr_explicit_realign
)
9518 lsq
= gimple_assign_lhs (new_stmt
);
9519 if (!realignment_token
)
9520 realignment_token
= dataref_ptr
;
9521 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9522 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9523 msq
, lsq
, realignment_token
);
9524 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9525 gimple_assign_set_lhs (new_stmt
, new_temp
);
9527 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9529 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9532 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9533 add_phi_arg (phi
, lsq
,
9534 loop_latch_edge (containing_loop
),
9540 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9542 tree perm_mask
= perm_mask_for_reverse (vectype
);
9543 new_temp
= permute_vec_elements (new_temp
, new_temp
,
9544 perm_mask
, stmt_info
, gsi
);
9545 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9548 /* Collect vector loads and later create their permutation in
9549 vect_transform_grouped_load (). */
9550 if (grouped_load
|| slp_perm
)
9551 dr_chain
.quick_push (new_temp
);
9553 /* Store vector loads in the corresponding SLP_NODE. */
9554 if (slp
&& !slp_perm
)
9555 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9557 /* With SLP permutation we load the gaps as well, without
9558 we need to skip the gaps after we manage to fully load
9559 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9560 group_elt
+= nunits
;
9561 if (maybe_ne (group_gap_adj
, 0U)
9563 && known_eq (group_elt
, group_size
- group_gap_adj
))
9565 poly_wide_int bump_val
9566 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9568 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9569 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9574 /* Bump the vector pointer to account for a gap or for excess
9575 elements loaded for a permuted SLP load. */
9576 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9578 poly_wide_int bump_val
9579 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9581 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9582 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9587 if (slp
&& !slp_perm
)
9593 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
9594 slp_node_instance
, false,
9597 dr_chain
.release ();
9605 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9606 vect_transform_grouped_load (stmt_info
, dr_chain
,
9608 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9613 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9615 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9616 prev_stmt_info
= new_stmt_info
;
9619 dr_chain
.release ();
9625 /* Function vect_is_simple_cond.
9628 LOOP - the loop that is being vectorized.
9629 COND - Condition that is checked for simple use.
9632 *COMP_VECTYPE - the vector type for the comparison.
9633 *DTS - The def types for the arguments of the comparison
9635 Returns whether a COND can be vectorized. Checks whether
9636 condition operands are supportable using vec_is_simple_use. */
9639 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
9640 tree
*comp_vectype
, enum vect_def_type
*dts
,
9644 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9647 if (TREE_CODE (cond
) == SSA_NAME
9648 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9650 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
9652 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9657 if (!COMPARISON_CLASS_P (cond
))
9660 lhs
= TREE_OPERAND (cond
, 0);
9661 rhs
= TREE_OPERAND (cond
, 1);
9663 if (TREE_CODE (lhs
) == SSA_NAME
)
9665 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
9668 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9669 || TREE_CODE (lhs
) == FIXED_CST
)
9670 dts
[0] = vect_constant_def
;
9674 if (TREE_CODE (rhs
) == SSA_NAME
)
9676 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
9679 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9680 || TREE_CODE (rhs
) == FIXED_CST
)
9681 dts
[1] = vect_constant_def
;
9685 if (vectype1
&& vectype2
9686 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9687 TYPE_VECTOR_SUBPARTS (vectype2
)))
9690 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9691 /* Invariant comparison. */
9692 if (! *comp_vectype
)
9694 tree scalar_type
= TREE_TYPE (lhs
);
9695 /* If we can widen the comparison to match vectype do so. */
9696 if (INTEGRAL_TYPE_P (scalar_type
)
9698 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
9699 TYPE_SIZE (TREE_TYPE (vectype
))))
9700 scalar_type
= build_nonstandard_integer_type
9701 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
9702 TYPE_UNSIGNED (scalar_type
));
9703 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
9709 /* vectorizable_condition.
9711 Check if STMT_INFO is conditional modify expression that can be vectorized.
9712 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9713 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9716 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9718 Return true if STMT_INFO is vectorizable in this way. */
9721 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9722 stmt_vec_info
*vec_stmt
, bool for_reduction
,
9723 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9725 vec_info
*vinfo
= stmt_info
->vinfo
;
9726 tree scalar_dest
= NULL_TREE
;
9727 tree vec_dest
= NULL_TREE
;
9728 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
9729 tree then_clause
, else_clause
;
9730 tree comp_vectype
= NULL_TREE
;
9731 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
9732 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
9735 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9736 enum vect_def_type dts
[4]
9737 = {vect_unknown_def_type
, vect_unknown_def_type
,
9738 vect_unknown_def_type
, vect_unknown_def_type
};
9741 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9742 stmt_vec_info prev_stmt_info
= NULL
;
9744 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9745 vec
<tree
> vec_oprnds0
= vNULL
;
9746 vec
<tree
> vec_oprnds1
= vNULL
;
9747 vec
<tree
> vec_oprnds2
= vNULL
;
9748 vec
<tree
> vec_oprnds3
= vNULL
;
9750 bool masked
= false;
9752 if (for_reduction
&& STMT_SLP_TYPE (stmt_info
))
9755 vect_reduction_type reduction_type
9756 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
9757 if (reduction_type
== TREE_CODE_REDUCTION
)
9759 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9762 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9763 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
9767 /* FORNOW: not yet supported. */
9768 if (STMT_VINFO_LIVE_P (stmt_info
))
9770 if (dump_enabled_p ())
9771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9772 "value used after loop.\n");
9777 /* Is vectorizable conditional operation? */
9778 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9782 code
= gimple_assign_rhs_code (stmt
);
9784 if (code
!= COND_EXPR
)
9787 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9788 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9793 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9795 gcc_assert (ncopies
>= 1);
9796 if (for_reduction
&& ncopies
> 1)
9797 return false; /* FORNOW */
9799 cond_expr
= gimple_assign_rhs1 (stmt
);
9800 then_clause
= gimple_assign_rhs2 (stmt
);
9801 else_clause
= gimple_assign_rhs3 (stmt
);
9803 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
9804 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
9808 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
9810 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
9813 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
9816 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
9819 masked
= !COMPARISON_CLASS_P (cond_expr
);
9820 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
9822 if (vec_cmp_type
== NULL_TREE
)
9825 cond_code
= TREE_CODE (cond_expr
);
9828 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
9829 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
9832 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
9834 /* Boolean values may have another representation in vectors
9835 and therefore we prefer bit operations over comparison for
9836 them (which also works for scalar masks). We store opcodes
9837 to use in bitop1 and bitop2. Statement is vectorized as
9838 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9839 depending on bitop1 and bitop2 arity. */
9843 bitop1
= BIT_NOT_EXPR
;
9844 bitop2
= BIT_AND_EXPR
;
9847 bitop1
= BIT_NOT_EXPR
;
9848 bitop2
= BIT_IOR_EXPR
;
9851 bitop1
= BIT_NOT_EXPR
;
9852 bitop2
= BIT_AND_EXPR
;
9853 std::swap (cond_expr0
, cond_expr1
);
9856 bitop1
= BIT_NOT_EXPR
;
9857 bitop2
= BIT_IOR_EXPR
;
9858 std::swap (cond_expr0
, cond_expr1
);
9861 bitop1
= BIT_XOR_EXPR
;
9864 bitop1
= BIT_XOR_EXPR
;
9865 bitop2
= BIT_NOT_EXPR
;
9870 cond_code
= SSA_NAME
;
9875 if (bitop1
!= NOP_EXPR
)
9877 machine_mode mode
= TYPE_MODE (comp_vectype
);
9880 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
9881 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9884 if (bitop2
!= NOP_EXPR
)
9886 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
9888 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9892 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
9895 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
9896 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
9907 vec_oprnds0
.create (1);
9908 vec_oprnds1
.create (1);
9909 vec_oprnds2
.create (1);
9910 vec_oprnds3
.create (1);
9914 scalar_dest
= gimple_assign_lhs (stmt
);
9915 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
9916 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9918 /* Handle cond expr. */
9919 for (j
= 0; j
< ncopies
; j
++)
9921 stmt_vec_info new_stmt_info
= NULL
;
9926 auto_vec
<tree
, 4> ops
;
9927 auto_vec
<vec
<tree
>, 4> vec_defs
;
9930 ops
.safe_push (cond_expr
);
9933 ops
.safe_push (cond_expr0
);
9934 ops
.safe_push (cond_expr1
);
9936 ops
.safe_push (then_clause
);
9937 ops
.safe_push (else_clause
);
9938 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9939 vec_oprnds3
= vec_defs
.pop ();
9940 vec_oprnds2
= vec_defs
.pop ();
9942 vec_oprnds1
= vec_defs
.pop ();
9943 vec_oprnds0
= vec_defs
.pop ();
9950 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
9956 = vect_get_vec_def_for_operand (cond_expr0
,
9957 stmt_info
, comp_vectype
);
9959 = vect_get_vec_def_for_operand (cond_expr1
,
9960 stmt_info
, comp_vectype
);
9962 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
9964 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
9965 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
9972 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
9975 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
9977 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
9978 vec_oprnds2
.pop ());
9979 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
9980 vec_oprnds3
.pop ());
9985 vec_oprnds0
.quick_push (vec_cond_lhs
);
9987 vec_oprnds1
.quick_push (vec_cond_rhs
);
9988 vec_oprnds2
.quick_push (vec_then_clause
);
9989 vec_oprnds3
.quick_push (vec_else_clause
);
9992 /* Arguments are ready. Create the new vector stmt. */
9993 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
9995 vec_then_clause
= vec_oprnds2
[i
];
9996 vec_else_clause
= vec_oprnds3
[i
];
9999 vec_compare
= vec_cond_lhs
;
10002 vec_cond_rhs
= vec_oprnds1
[i
];
10003 if (bitop1
== NOP_EXPR
)
10004 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10005 vec_cond_lhs
, vec_cond_rhs
);
10008 new_temp
= make_ssa_name (vec_cmp_type
);
10010 if (bitop1
== BIT_NOT_EXPR
)
10011 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10015 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10017 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10018 if (bitop2
== NOP_EXPR
)
10019 vec_compare
= new_temp
;
10020 else if (bitop2
== BIT_NOT_EXPR
)
10022 /* Instead of doing ~x ? y : z do x ? z : y. */
10023 vec_compare
= new_temp
;
10024 std::swap (vec_then_clause
, vec_else_clause
);
10028 vec_compare
= make_ssa_name (vec_cmp_type
);
10030 = gimple_build_assign (vec_compare
, bitop2
,
10031 vec_cond_lhs
, new_temp
);
10032 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10036 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10038 if (!is_gimple_val (vec_compare
))
10040 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10041 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10043 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10044 vec_compare
= vec_compare_name
;
10046 gcall
*new_stmt
= gimple_build_call_internal
10047 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10049 gimple_call_set_lhs (new_stmt
, scalar_dest
);
10050 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
10051 if (stmt_info
->stmt
== gsi_stmt (*gsi
))
10052 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
10055 /* In this case we're moving the definition to later in the
10056 block. That doesn't matter because the only uses of the
10057 lhs are in phi statements. */
10058 gimple_stmt_iterator old_gsi
10059 = gsi_for_stmt (stmt_info
->stmt
);
10060 gsi_remove (&old_gsi
, true);
10062 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10067 new_temp
= make_ssa_name (vec_dest
);
10069 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10070 vec_then_clause
, vec_else_clause
);
10072 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10075 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10082 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10084 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10086 prev_stmt_info
= new_stmt_info
;
10089 vec_oprnds0
.release ();
10090 vec_oprnds1
.release ();
10091 vec_oprnds2
.release ();
10092 vec_oprnds3
.release ();
10097 /* vectorizable_comparison.
10099 Check if STMT_INFO is comparison expression that can be vectorized.
10100 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10101 comparison, put it in VEC_STMT, and insert it at GSI.
10103 Return true if STMT_INFO is vectorizable in this way. */
10106 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10107 stmt_vec_info
*vec_stmt
,
10108 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10110 vec_info
*vinfo
= stmt_info
->vinfo
;
10111 tree lhs
, rhs1
, rhs2
;
10112 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10113 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10114 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10116 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10117 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10119 poly_uint64 nunits
;
10121 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10122 stmt_vec_info prev_stmt_info
= NULL
;
10124 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10125 vec
<tree
> vec_oprnds0
= vNULL
;
10126 vec
<tree
> vec_oprnds1
= vNULL
;
10130 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10133 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10136 mask_type
= vectype
;
10137 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10142 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10144 gcc_assert (ncopies
>= 1);
10145 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10148 if (STMT_VINFO_LIVE_P (stmt_info
))
10150 if (dump_enabled_p ())
10151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10152 "value used after loop.\n");
10156 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10160 code
= gimple_assign_rhs_code (stmt
);
10162 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10165 rhs1
= gimple_assign_rhs1 (stmt
);
10166 rhs2
= gimple_assign_rhs2 (stmt
);
10168 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
10171 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
10174 if (vectype1
&& vectype2
10175 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10176 TYPE_VECTOR_SUBPARTS (vectype2
)))
10179 vectype
= vectype1
? vectype1
: vectype2
;
10181 /* Invariant comparison. */
10184 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
10185 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10188 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10191 /* Can't compare mask and non-mask types. */
10192 if (vectype1
&& vectype2
10193 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10196 /* Boolean values may have another representation in vectors
10197 and therefore we prefer bit operations over comparison for
10198 them (which also works for scalar masks). We store opcodes
10199 to use in bitop1 and bitop2. Statement is vectorized as
10200 BITOP2 (rhs1 BITOP1 rhs2) or
10201 rhs1 BITOP2 (BITOP1 rhs2)
10202 depending on bitop1 and bitop2 arity. */
10203 bool swap_p
= false;
10204 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10206 if (code
== GT_EXPR
)
10208 bitop1
= BIT_NOT_EXPR
;
10209 bitop2
= BIT_AND_EXPR
;
10211 else if (code
== GE_EXPR
)
10213 bitop1
= BIT_NOT_EXPR
;
10214 bitop2
= BIT_IOR_EXPR
;
10216 else if (code
== LT_EXPR
)
10218 bitop1
= BIT_NOT_EXPR
;
10219 bitop2
= BIT_AND_EXPR
;
10222 else if (code
== LE_EXPR
)
10224 bitop1
= BIT_NOT_EXPR
;
10225 bitop2
= BIT_IOR_EXPR
;
10230 bitop1
= BIT_XOR_EXPR
;
10231 if (code
== EQ_EXPR
)
10232 bitop2
= BIT_NOT_EXPR
;
10238 if (bitop1
== NOP_EXPR
)
10240 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10245 machine_mode mode
= TYPE_MODE (vectype
);
10248 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10249 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10252 if (bitop2
!= NOP_EXPR
)
10254 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10255 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10260 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10261 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10262 dts
, ndts
, slp_node
, cost_vec
);
10269 vec_oprnds0
.create (1);
10270 vec_oprnds1
.create (1);
10274 lhs
= gimple_assign_lhs (stmt
);
10275 mask
= vect_create_destination_var (lhs
, mask_type
);
10277 /* Handle cmp expr. */
10278 for (j
= 0; j
< ncopies
; j
++)
10280 stmt_vec_info new_stmt_info
= NULL
;
10285 auto_vec
<tree
, 2> ops
;
10286 auto_vec
<vec
<tree
>, 2> vec_defs
;
10288 ops
.safe_push (rhs1
);
10289 ops
.safe_push (rhs2
);
10290 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
10291 vec_oprnds1
= vec_defs
.pop ();
10292 vec_oprnds0
= vec_defs
.pop ();
10294 std::swap (vec_oprnds0
, vec_oprnds1
);
10298 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
10300 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
10306 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10307 vec_oprnds0
.pop ());
10308 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10309 vec_oprnds1
.pop ());
10315 std::swap (vec_rhs1
, vec_rhs2
);
10316 vec_oprnds0
.quick_push (vec_rhs1
);
10317 vec_oprnds1
.quick_push (vec_rhs2
);
10320 /* Arguments are ready. Create the new vector stmt. */
10321 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10323 vec_rhs2
= vec_oprnds1
[i
];
10325 new_temp
= make_ssa_name (mask
);
10326 if (bitop1
== NOP_EXPR
)
10328 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10329 vec_rhs1
, vec_rhs2
);
10331 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10336 if (bitop1
== BIT_NOT_EXPR
)
10337 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10339 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10342 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10343 if (bitop2
!= NOP_EXPR
)
10345 tree res
= make_ssa_name (mask
);
10346 if (bitop2
== BIT_NOT_EXPR
)
10347 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10349 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10352 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10356 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10363 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10365 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10367 prev_stmt_info
= new_stmt_info
;
10370 vec_oprnds0
.release ();
10371 vec_oprnds1
.release ();
10376 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10377 can handle all live statements in the node. Otherwise return true
10378 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10379 GSI and VEC_STMT are as for vectorizable_live_operation. */
10382 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10383 slp_tree slp_node
, stmt_vec_info
*vec_stmt
,
10384 stmt_vector_for_cost
*cost_vec
)
10388 stmt_vec_info slp_stmt_info
;
10390 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10392 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10393 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
, i
,
10394 vec_stmt
, cost_vec
))
10398 else if (STMT_VINFO_LIVE_P (stmt_info
)
10399 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
, -1,
10400 vec_stmt
, cost_vec
))
10406 /* Make sure the statement is vectorizable. */
10409 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10410 slp_tree node
, slp_instance node_instance
,
10411 stmt_vector_for_cost
*cost_vec
)
10413 vec_info
*vinfo
= stmt_info
->vinfo
;
10414 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10415 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10417 gimple_seq pattern_def_seq
;
10419 if (dump_enabled_p ())
10420 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10423 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10424 return opt_result::failure_at (stmt_info
->stmt
,
10426 " stmt has volatile operands: %G\n",
10429 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10431 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10433 gimple_stmt_iterator si
;
10435 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10437 stmt_vec_info pattern_def_stmt_info
10438 = vinfo
->lookup_stmt (gsi_stmt (si
));
10439 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10440 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10442 /* Analyze def stmt of STMT if it's a pattern stmt. */
10443 if (dump_enabled_p ())
10444 dump_printf_loc (MSG_NOTE
, vect_location
,
10445 "==> examining pattern def statement: %G",
10446 pattern_def_stmt_info
->stmt
);
10449 = vect_analyze_stmt (pattern_def_stmt_info
,
10450 need_to_vectorize
, node
, node_instance
,
10458 /* Skip stmts that do not need to be vectorized. In loops this is expected
10460 - the COND_EXPR which is the loop exit condition
10461 - any LABEL_EXPRs in the loop
10462 - computations that are used only for array indexing or loop control.
10463 In basic blocks we only analyze statements that are a part of some SLP
10464 instance, therefore, all the statements are relevant.
10466 Pattern statement needs to be analyzed instead of the original statement
10467 if the original statement is not relevant. Otherwise, we analyze both
10468 statements. In basic blocks we are called from some SLP instance
10469 traversal, don't analyze pattern stmts instead, the pattern stmts
10470 already will be part of SLP instance. */
10472 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10473 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10474 && !STMT_VINFO_LIVE_P (stmt_info
))
10476 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10477 && pattern_stmt_info
10478 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10479 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10481 /* Analyze PATTERN_STMT instead of the original stmt. */
10482 stmt_info
= pattern_stmt_info
;
10483 if (dump_enabled_p ())
10484 dump_printf_loc (MSG_NOTE
, vect_location
,
10485 "==> examining pattern statement: %G",
10490 if (dump_enabled_p ())
10491 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10493 return opt_result::success ();
10496 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10498 && pattern_stmt_info
10499 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10500 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10502 /* Analyze PATTERN_STMT too. */
10503 if (dump_enabled_p ())
10504 dump_printf_loc (MSG_NOTE
, vect_location
,
10505 "==> examining pattern statement: %G",
10506 pattern_stmt_info
->stmt
);
10509 = vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
10510 node_instance
, cost_vec
);
10515 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10517 case vect_internal_def
:
10520 case vect_reduction_def
:
10521 case vect_nested_cycle
:
10522 gcc_assert (!bb_vinfo
10523 && (relevance
== vect_used_in_outer
10524 || relevance
== vect_used_in_outer_by_reduction
10525 || relevance
== vect_used_by_reduction
10526 || relevance
== vect_unused_in_scope
10527 || relevance
== vect_used_only_live
));
10530 case vect_induction_def
:
10531 gcc_assert (!bb_vinfo
);
10534 case vect_constant_def
:
10535 case vect_external_def
:
10536 case vect_unknown_def_type
:
10538 gcc_unreachable ();
10541 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10543 tree type
= gimple_expr_type (stmt_info
->stmt
);
10544 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
10545 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10546 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10547 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10548 *need_to_vectorize
= true;
10551 if (PURE_SLP_STMT (stmt_info
) && !node
)
10553 if (dump_enabled_p ())
10554 dump_printf_loc (MSG_NOTE
, vect_location
,
10555 "handled only by SLP analysis\n");
10556 return opt_result::success ();
10561 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10562 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10563 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10564 -mveclibabi= takes preference over library functions with
10565 the simd attribute. */
10566 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10567 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10569 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10570 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10571 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10572 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10574 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10575 || vectorizable_reduction (stmt_info
, NULL
, NULL
, node
,
10576 node_instance
, cost_vec
)
10577 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10578 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10579 || vectorizable_condition (stmt_info
, NULL
, NULL
, false, node
,
10581 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10586 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10587 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10589 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
10591 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10592 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10593 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
10595 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10597 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10598 || vectorizable_condition (stmt_info
, NULL
, NULL
, false, node
,
10600 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10605 return opt_result::failure_at (stmt_info
->stmt
,
10607 " relevant stmt not supported: %G",
10610 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10611 need extra handling, except for vectorizable reductions. */
10613 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10614 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, NULL
, cost_vec
))
10615 return opt_result::failure_at (stmt_info
->stmt
,
10617 " live stmt not supported: %G",
10620 return opt_result::success ();
10624 /* Function vect_transform_stmt.
10626 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
10629 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10630 slp_tree slp_node
, slp_instance slp_node_instance
)
10632 vec_info
*vinfo
= stmt_info
->vinfo
;
10633 bool is_store
= false;
10634 stmt_vec_info vec_stmt
= NULL
;
10637 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
10638 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
10640 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
10641 && nested_in_vect_loop_p
10642 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
10645 gimple
*stmt
= stmt_info
->stmt
;
10646 switch (STMT_VINFO_TYPE (stmt_info
))
10648 case type_demotion_vec_info_type
:
10649 case type_promotion_vec_info_type
:
10650 case type_conversion_vec_info_type
:
10651 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10656 case induc_vec_info_type
:
10657 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10662 case shift_vec_info_type
:
10663 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10667 case op_vec_info_type
:
10668 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10673 case assignment_vec_info_type
:
10674 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10679 case load_vec_info_type
:
10680 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10681 slp_node_instance
, NULL
);
10685 case store_vec_info_type
:
10686 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10688 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
10690 /* In case of interleaving, the whole chain is vectorized when the
10691 last store in the chain is reached. Store stmts before the last
10692 one are skipped, and there vec_stmt_info shouldn't be freed
10694 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10695 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
10702 case condition_vec_info_type
:
10703 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, false,
10708 case comparison_vec_info_type
:
10709 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
,
10714 case call_vec_info_type
:
10715 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10716 stmt
= gsi_stmt (*gsi
);
10719 case call_simd_clone_vec_info_type
:
10720 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
10722 stmt
= gsi_stmt (*gsi
);
10725 case reduc_vec_info_type
:
10726 done
= vectorizable_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10727 slp_node_instance
, NULL
);
10732 if (!STMT_VINFO_LIVE_P (stmt_info
))
10734 if (dump_enabled_p ())
10735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10736 "stmt not supported.\n");
10737 gcc_unreachable ();
10741 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
10742 This would break hybrid SLP vectorization. */
10744 gcc_assert (!vec_stmt
10745 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
10747 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
10748 is being vectorized, but outside the immediately enclosing loop. */
10751 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10752 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
10753 || STMT_VINFO_RELEVANT (stmt_info
) ==
10754 vect_used_in_outer_by_reduction
))
10756 struct loop
*innerloop
= LOOP_VINFO_LOOP (
10757 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
10758 imm_use_iterator imm_iter
;
10759 use_operand_p use_p
;
10762 if (dump_enabled_p ())
10763 dump_printf_loc (MSG_NOTE
, vect_location
,
10764 "Record the vdef for outer-loop vectorization.\n");
10766 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
10767 (to be used when vectorizing outer-loop stmts that use the DEF of
10769 if (gimple_code (stmt
) == GIMPLE_PHI
)
10770 scalar_dest
= PHI_RESULT (stmt
);
10772 scalar_dest
= gimple_get_lhs (stmt
);
10774 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
10775 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
10777 stmt_vec_info exit_phi_info
10778 = vinfo
->lookup_stmt (USE_STMT (use_p
));
10779 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
10783 /* Handle stmts whose DEF is used outside the loop-nest that is
10784 being vectorized. */
10785 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
10787 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
, &vec_stmt
,
10793 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
10799 /* Remove a group of stores (for SLP or interleaving), free their
10803 vect_remove_stores (stmt_vec_info first_stmt_info
)
10805 vec_info
*vinfo
= first_stmt_info
->vinfo
;
10806 stmt_vec_info next_stmt_info
= first_stmt_info
;
10808 while (next_stmt_info
)
10810 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10811 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
10812 /* Free the attached stmt_vec_info and remove the stmt. */
10813 vinfo
->remove_stmt (next_stmt_info
);
10814 next_stmt_info
= tmp
;
10818 /* Function get_vectype_for_scalar_type_and_size.
10820 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
10824 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
10826 tree orig_scalar_type
= scalar_type
;
10827 scalar_mode inner_mode
;
10828 machine_mode simd_mode
;
10829 poly_uint64 nunits
;
10832 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
10833 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
10836 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
10838 /* For vector types of elements whose mode precision doesn't
10839 match their types precision we use a element type of mode
10840 precision. The vectorization routines will have to make sure
10841 they support the proper result truncation/extension.
10842 We also make sure to build vector types with INTEGER_TYPE
10843 component type only. */
10844 if (INTEGRAL_TYPE_P (scalar_type
)
10845 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
10846 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
10847 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
10848 TYPE_UNSIGNED (scalar_type
));
10850 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10851 When the component mode passes the above test simply use a type
10852 corresponding to that mode. The theory is that any use that
10853 would cause problems with this will disable vectorization anyway. */
10854 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
10855 && !INTEGRAL_TYPE_P (scalar_type
))
10856 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
10858 /* We can't build a vector type of elements with alignment bigger than
10860 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
10861 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
10862 TYPE_UNSIGNED (scalar_type
));
10864 /* If we felt back to using the mode fail if there was
10865 no scalar type for it. */
10866 if (scalar_type
== NULL_TREE
)
10869 /* If no size was supplied use the mode the target prefers. Otherwise
10870 lookup a vector mode of the specified size. */
10871 if (known_eq (size
, 0U))
10872 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
10873 else if (!multiple_p (size
, nbytes
, &nunits
)
10874 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
10876 /* NOTE: nunits == 1 is allowed to support single element vector types. */
10877 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
10880 vectype
= build_vector_type (scalar_type
, nunits
);
10882 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
10883 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
10886 /* Re-attach the address-space qualifier if we canonicalized the scalar
10888 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
10889 return build_qualified_type
10890 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
10895 poly_uint64 current_vector_size
;
10897 /* Function get_vectype_for_scalar_type.
10899 Returns the vector type corresponding to SCALAR_TYPE as supported
10903 get_vectype_for_scalar_type (tree scalar_type
)
10906 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
10907 current_vector_size
);
10909 && known_eq (current_vector_size
, 0U))
10910 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
10914 /* Function get_mask_type_for_scalar_type.
10916 Returns the mask type corresponding to a result of comparison
10917 of vectors of specified SCALAR_TYPE as supported by target. */
10920 get_mask_type_for_scalar_type (tree scalar_type
)
10922 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
10927 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
10928 current_vector_size
);
10931 /* Function get_same_sized_vectype
10933 Returns a vector type corresponding to SCALAR_TYPE of size
10934 VECTOR_TYPE if supported by the target. */
10937 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
10939 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10940 return build_same_sized_truth_vector_type (vector_type
);
10942 return get_vectype_for_scalar_type_and_size
10943 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
10946 /* Function vect_is_simple_use.
10949 VINFO - the vect info of the loop or basic block that is being vectorized.
10950 OPERAND - operand in the loop or bb.
10952 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10953 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10954 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10955 the definition could be anywhere in the function
10956 DT - the type of definition
10958 Returns whether a stmt with OPERAND can be vectorized.
10959 For loops, supportable operands are constants, loop invariants, and operands
10960 that are defined by the current iteration of the loop. Unsupportable
10961 operands are those that are defined by a previous iteration of the loop (as
10962 is the case in reduction/induction computations).
10963 For basic blocks, supportable operands are constants and bb invariants.
10964 For now, operands defined outside the basic block are not supported. */
10967 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10968 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
10970 if (def_stmt_info_out
)
10971 *def_stmt_info_out
= NULL
;
10973 *def_stmt_out
= NULL
;
10974 *dt
= vect_unknown_def_type
;
10976 if (dump_enabled_p ())
10978 dump_printf_loc (MSG_NOTE
, vect_location
,
10979 "vect_is_simple_use: operand ");
10980 if (TREE_CODE (operand
) == SSA_NAME
10981 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
10982 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
10984 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
10987 if (CONSTANT_CLASS_P (operand
))
10988 *dt
= vect_constant_def
;
10989 else if (is_gimple_min_invariant (operand
))
10990 *dt
= vect_external_def
;
10991 else if (TREE_CODE (operand
) != SSA_NAME
)
10992 *dt
= vect_unknown_def_type
;
10993 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
10994 *dt
= vect_external_def
;
10997 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
10998 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11000 *dt
= vect_external_def
;
11003 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11004 def_stmt
= stmt_vinfo
->stmt
;
11005 switch (gimple_code (def_stmt
))
11008 case GIMPLE_ASSIGN
:
11010 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11013 *dt
= vect_unknown_def_type
;
11016 if (def_stmt_info_out
)
11017 *def_stmt_info_out
= stmt_vinfo
;
11020 *def_stmt_out
= def_stmt
;
11023 if (dump_enabled_p ())
11025 dump_printf (MSG_NOTE
, ", type of def: ");
11028 case vect_uninitialized_def
:
11029 dump_printf (MSG_NOTE
, "uninitialized\n");
11031 case vect_constant_def
:
11032 dump_printf (MSG_NOTE
, "constant\n");
11034 case vect_external_def
:
11035 dump_printf (MSG_NOTE
, "external\n");
11037 case vect_internal_def
:
11038 dump_printf (MSG_NOTE
, "internal\n");
11040 case vect_induction_def
:
11041 dump_printf (MSG_NOTE
, "induction\n");
11043 case vect_reduction_def
:
11044 dump_printf (MSG_NOTE
, "reduction\n");
11046 case vect_double_reduction_def
:
11047 dump_printf (MSG_NOTE
, "double reduction\n");
11049 case vect_nested_cycle
:
11050 dump_printf (MSG_NOTE
, "nested cycle\n");
11052 case vect_unknown_def_type
:
11053 dump_printf (MSG_NOTE
, "unknown\n");
11058 if (*dt
== vect_unknown_def_type
)
11060 if (dump_enabled_p ())
11061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11062 "Unsupported pattern.\n");
11069 /* Function vect_is_simple_use.
11071 Same as vect_is_simple_use but also determines the vector operand
11072 type of OPERAND and stores it to *VECTYPE. If the definition of
11073 OPERAND is vect_uninitialized_def, vect_constant_def or
11074 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11075 is responsible to compute the best suited vector type for the
11079 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11080 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11081 gimple
**def_stmt_out
)
11083 stmt_vec_info def_stmt_info
;
11085 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11089 *def_stmt_out
= def_stmt
;
11090 if (def_stmt_info_out
)
11091 *def_stmt_info_out
= def_stmt_info
;
11093 /* Now get a vector type if the def is internal, otherwise supply
11094 NULL_TREE and leave it up to the caller to figure out a proper
11095 type for the use stmt. */
11096 if (*dt
== vect_internal_def
11097 || *dt
== vect_induction_def
11098 || *dt
== vect_reduction_def
11099 || *dt
== vect_double_reduction_def
11100 || *dt
== vect_nested_cycle
)
11102 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11103 gcc_assert (*vectype
!= NULL_TREE
);
11104 if (dump_enabled_p ())
11105 dump_printf_loc (MSG_NOTE
, vect_location
,
11106 "vect_is_simple_use: vectype %T\n", *vectype
);
11108 else if (*dt
== vect_uninitialized_def
11109 || *dt
== vect_constant_def
11110 || *dt
== vect_external_def
)
11111 *vectype
= NULL_TREE
;
11113 gcc_unreachable ();
11119 /* Function supportable_widening_operation
11121 Check whether an operation represented by the code CODE is a
11122 widening operation that is supported by the target platform in
11123 vector form (i.e., when operating on arguments of type VECTYPE_IN
11124 producing a result of type VECTYPE_OUT).
11126 Widening operations we currently support are NOP (CONVERT), FLOAT,
11127 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11128 are supported by the target platform either directly (via vector
11129 tree-codes), or via target builtins.
11132 - CODE1 and CODE2 are codes of vector operations to be used when
11133 vectorizing the operation, if available.
11134 - MULTI_STEP_CVT determines the number of required intermediate steps in
11135 case of multi-step conversion (like char->short->int - in that case
11136 MULTI_STEP_CVT will be 1).
11137 - INTERM_TYPES contains the intermediate type required to perform the
11138 widening operation (short in the above example). */
11141 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
11142 tree vectype_out
, tree vectype_in
,
11143 enum tree_code
*code1
, enum tree_code
*code2
,
11144 int *multi_step_cvt
,
11145 vec
<tree
> *interm_types
)
11147 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
11148 struct loop
*vect_loop
= NULL
;
11149 machine_mode vec_mode
;
11150 enum insn_code icode1
, icode2
;
11151 optab optab1
, optab2
;
11152 tree vectype
= vectype_in
;
11153 tree wide_vectype
= vectype_out
;
11154 enum tree_code c1
, c2
;
11156 tree prev_type
, intermediate_type
;
11157 machine_mode intermediate_mode
, prev_mode
;
11158 optab optab3
, optab4
;
11160 *multi_step_cvt
= 0;
11162 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11166 case WIDEN_MULT_EXPR
:
11167 /* The result of a vectorized widening operation usually requires
11168 two vectors (because the widened results do not fit into one vector).
11169 The generated vector results would normally be expected to be
11170 generated in the same order as in the original scalar computation,
11171 i.e. if 8 results are generated in each vector iteration, they are
11172 to be organized as follows:
11173 vect1: [res1,res2,res3,res4],
11174 vect2: [res5,res6,res7,res8].
11176 However, in the special case that the result of the widening
11177 operation is used in a reduction computation only, the order doesn't
11178 matter (because when vectorizing a reduction we change the order of
11179 the computation). Some targets can take advantage of this and
11180 generate more efficient code. For example, targets like Altivec,
11181 that support widen_mult using a sequence of {mult_even,mult_odd}
11182 generate the following vectors:
11183 vect1: [res1,res3,res5,res7],
11184 vect2: [res2,res4,res6,res8].
11186 When vectorizing outer-loops, we execute the inner-loop sequentially
11187 (each vectorized inner-loop iteration contributes to VF outer-loop
11188 iterations in parallel). We therefore don't allow to change the
11189 order of the computation in the inner-loop during outer-loop
11191 /* TODO: Another case in which order doesn't *really* matter is when we
11192 widen and then contract again, e.g. (short)((int)x * y >> 8).
11193 Normally, pack_trunc performs an even/odd permute, whereas the
11194 repack from an even/odd expansion would be an interleave, which
11195 would be significantly simpler for e.g. AVX2. */
11196 /* In any case, in order to avoid duplicating the code below, recurse
11197 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11198 are properly set up for the caller. If we fail, we'll continue with
11199 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11201 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11202 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11203 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
11204 stmt_info
, vectype_out
,
11205 vectype_in
, code1
, code2
,
11206 multi_step_cvt
, interm_types
))
11208 /* Elements in a vector with vect_used_by_reduction property cannot
11209 be reordered if the use chain with this property does not have the
11210 same operation. One such an example is s += a * b, where elements
11211 in a and b cannot be reordered. Here we check if the vector defined
11212 by STMT is only directly used in the reduction statement. */
11213 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11214 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11216 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11219 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11220 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11223 case DOT_PROD_EXPR
:
11224 c1
= DOT_PROD_EXPR
;
11225 c2
= DOT_PROD_EXPR
;
11233 case VEC_WIDEN_MULT_EVEN_EXPR
:
11234 /* Support the recursion induced just above. */
11235 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11236 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11239 case WIDEN_LSHIFT_EXPR
:
11240 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11241 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11245 c1
= VEC_UNPACK_LO_EXPR
;
11246 c2
= VEC_UNPACK_HI_EXPR
;
11250 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11251 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11254 case FIX_TRUNC_EXPR
:
11255 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11256 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11260 gcc_unreachable ();
11263 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11264 std::swap (c1
, c2
);
11266 if (code
== FIX_TRUNC_EXPR
)
11268 /* The signedness is determined from output operand. */
11269 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11270 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11272 else if (CONVERT_EXPR_CODE_P (code
)
11273 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11274 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11275 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11276 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11278 /* If the input and result modes are the same, a different optab
11279 is needed where we pass in the number of units in vectype. */
11280 optab1
= vec_unpacks_sbool_lo_optab
;
11281 optab2
= vec_unpacks_sbool_hi_optab
;
11285 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11286 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11289 if (!optab1
|| !optab2
)
11292 vec_mode
= TYPE_MODE (vectype
);
11293 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11294 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11300 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11301 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11303 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11305 /* For scalar masks we may have different boolean
11306 vector types having the same QImode. Thus we
11307 add additional check for elements number. */
11308 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11309 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11313 /* Check if it's a multi-step conversion that can be done using intermediate
11316 prev_type
= vectype
;
11317 prev_mode
= vec_mode
;
11319 if (!CONVERT_EXPR_CODE_P (code
))
11322 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11323 intermediate steps in promotion sequence. We try
11324 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11326 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11327 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11329 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11330 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11332 intermediate_type
= vect_halve_mask_nunits (prev_type
);
11333 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
11338 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11339 TYPE_UNSIGNED (prev_type
));
11341 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11342 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11343 && intermediate_mode
== prev_mode
11344 && SCALAR_INT_MODE_P (prev_mode
))
11346 /* If the input and result modes are the same, a different optab
11347 is needed where we pass in the number of units in vectype. */
11348 optab3
= vec_unpacks_sbool_lo_optab
;
11349 optab4
= vec_unpacks_sbool_hi_optab
;
11353 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11354 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11357 if (!optab3
|| !optab4
11358 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11359 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11360 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11361 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11362 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11363 == CODE_FOR_nothing
)
11364 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11365 == CODE_FOR_nothing
))
11368 interm_types
->quick_push (intermediate_type
);
11369 (*multi_step_cvt
)++;
11371 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11372 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11374 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11376 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11377 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11381 prev_type
= intermediate_type
;
11382 prev_mode
= intermediate_mode
;
11385 interm_types
->release ();
11390 /* Function supportable_narrowing_operation
11392 Check whether an operation represented by the code CODE is a
11393 narrowing operation that is supported by the target platform in
11394 vector form (i.e., when operating on arguments of type VECTYPE_IN
11395 and producing a result of type VECTYPE_OUT).
11397 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11398 and FLOAT. This function checks if these operations are supported by
11399 the target platform directly via vector tree-codes.
11402 - CODE1 is the code of a vector operation to be used when
11403 vectorizing the operation, if available.
11404 - MULTI_STEP_CVT determines the number of required intermediate steps in
11405 case of multi-step conversion (like int->short->char - in that case
11406 MULTI_STEP_CVT will be 1).
11407 - INTERM_TYPES contains the intermediate type required to perform the
11408 narrowing operation (short in the above example). */
11411 supportable_narrowing_operation (enum tree_code code
,
11412 tree vectype_out
, tree vectype_in
,
11413 enum tree_code
*code1
, int *multi_step_cvt
,
11414 vec
<tree
> *interm_types
)
11416 machine_mode vec_mode
;
11417 enum insn_code icode1
;
11418 optab optab1
, interm_optab
;
11419 tree vectype
= vectype_in
;
11420 tree narrow_vectype
= vectype_out
;
11422 tree intermediate_type
, prev_type
;
11423 machine_mode intermediate_mode
, prev_mode
;
11427 *multi_step_cvt
= 0;
11431 c1
= VEC_PACK_TRUNC_EXPR
;
11432 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11433 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11434 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11435 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11436 optab1
= vec_pack_sbool_trunc_optab
;
11438 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11441 case FIX_TRUNC_EXPR
:
11442 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11443 /* The signedness is determined from output operand. */
11444 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11448 c1
= VEC_PACK_FLOAT_EXPR
;
11449 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11453 gcc_unreachable ();
11459 vec_mode
= TYPE_MODE (vectype
);
11460 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
11465 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11467 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11469 /* For scalar masks we may have different boolean
11470 vector types having the same QImode. Thus we
11471 add additional check for elements number. */
11472 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
11473 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11477 if (code
== FLOAT_EXPR
)
11480 /* Check if it's a multi-step conversion that can be done using intermediate
11482 prev_mode
= vec_mode
;
11483 prev_type
= vectype
;
11484 if (code
== FIX_TRUNC_EXPR
)
11485 uns
= TYPE_UNSIGNED (vectype_out
);
11487 uns
= TYPE_UNSIGNED (vectype
);
11489 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11490 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11491 costly than signed. */
11492 if (code
== FIX_TRUNC_EXPR
&& uns
)
11494 enum insn_code icode2
;
11497 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
11499 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11500 if (interm_optab
!= unknown_optab
11501 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
11502 && insn_data
[icode1
].operand
[0].mode
11503 == insn_data
[icode2
].operand
[0].mode
)
11506 optab1
= interm_optab
;
11511 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11512 intermediate steps in promotion sequence. We try
11513 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11514 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11515 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11517 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11518 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11520 intermediate_type
= vect_double_mask_nunits (prev_type
);
11521 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
11526 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
11527 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11528 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11529 && intermediate_mode
== prev_mode
11530 && SCALAR_INT_MODE_P (prev_mode
))
11531 interm_optab
= vec_pack_sbool_trunc_optab
;
11534 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
11537 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
11538 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11539 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
11540 == CODE_FOR_nothing
))
11543 interm_types
->quick_push (intermediate_type
);
11544 (*multi_step_cvt
)++;
11546 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11548 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11550 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
11551 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11555 prev_mode
= intermediate_mode
;
11556 prev_type
= intermediate_type
;
11557 optab1
= interm_optab
;
11560 interm_types
->release ();
11564 /* Generate and return a statement that sets vector mask MASK such that
11565 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11568 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
11570 tree cmp_type
= TREE_TYPE (start_index
);
11571 tree mask_type
= TREE_TYPE (mask
);
11572 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
11573 cmp_type
, mask_type
,
11574 OPTIMIZE_FOR_SPEED
));
11575 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
11576 start_index
, end_index
,
11577 build_zero_cst (mask_type
));
11578 gimple_call_set_lhs (call
, mask
);
11582 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11583 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11586 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
11589 tree tmp
= make_ssa_name (mask_type
);
11590 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
11591 gimple_seq_add_stmt (seq
, call
);
11592 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
11595 /* Try to compute the vector types required to vectorize STMT_INFO,
11596 returning true on success and false if vectorization isn't possible.
11600 - Set *STMT_VECTYPE_OUT to:
11601 - NULL_TREE if the statement doesn't need to be vectorized;
11602 - boolean_type_node if the statement is a boolean operation whose
11603 vector type can only be determined once all the other vector types
11605 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11607 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11608 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11609 statement does not help to determine the overall number of units. */
11612 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
11613 tree
*stmt_vectype_out
,
11614 tree
*nunits_vectype_out
)
11616 gimple
*stmt
= stmt_info
->stmt
;
11618 *stmt_vectype_out
= NULL_TREE
;
11619 *nunits_vectype_out
= NULL_TREE
;
11621 if (gimple_get_lhs (stmt
) == NULL_TREE
11622 /* MASK_STORE has no lhs, but is ok. */
11623 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11625 if (is_a
<gcall
*> (stmt
))
11627 /* Ignore calls with no lhs. These must be calls to
11628 #pragma omp simd functions, and what vectorization factor
11629 it really needs can't be determined until
11630 vectorizable_simd_clone_call. */
11631 if (dump_enabled_p ())
11632 dump_printf_loc (MSG_NOTE
, vect_location
,
11633 "defer to SIMD clone analysis.\n");
11634 return opt_result::success ();
11637 return opt_result::failure_at (stmt
,
11638 "not vectorized: irregular stmt.%G", stmt
);
11641 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
11642 return opt_result::failure_at (stmt
,
11643 "not vectorized: vector stmt in loop:%G",
11647 tree scalar_type
= NULL_TREE
;
11648 if (STMT_VINFO_VECTYPE (stmt_info
))
11649 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11652 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
11653 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11654 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
11656 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
11658 /* Pure bool ops don't participate in number-of-units computation.
11659 For comparisons use the types being compared. */
11660 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
11661 && is_gimple_assign (stmt
)
11662 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
11664 *stmt_vectype_out
= boolean_type_node
;
11666 tree rhs1
= gimple_assign_rhs1 (stmt
);
11667 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
11668 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
11669 scalar_type
= TREE_TYPE (rhs1
);
11672 if (dump_enabled_p ())
11673 dump_printf_loc (MSG_NOTE
, vect_location
,
11674 "pure bool operation.\n");
11675 return opt_result::success ();
11679 if (dump_enabled_p ())
11680 dump_printf_loc (MSG_NOTE
, vect_location
,
11681 "get vectype for scalar type: %T\n", scalar_type
);
11682 vectype
= get_vectype_for_scalar_type (scalar_type
);
11684 return opt_result::failure_at (stmt
,
11686 " unsupported data-type %T\n",
11689 if (!*stmt_vectype_out
)
11690 *stmt_vectype_out
= vectype
;
11692 if (dump_enabled_p ())
11693 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
11696 /* Don't try to compute scalar types if the stmt produces a boolean
11697 vector; use the existing vector type instead. */
11698 tree nunits_vectype
;
11699 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
11700 nunits_vectype
= vectype
;
11703 /* The number of units is set according to the smallest scalar
11704 type (or the largest vector size, but we only support one
11705 vector size per vectorization). */
11706 if (*stmt_vectype_out
!= boolean_type_node
)
11708 HOST_WIDE_INT dummy
;
11709 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
11712 if (dump_enabled_p ())
11713 dump_printf_loc (MSG_NOTE
, vect_location
,
11714 "get vectype for scalar type: %T\n", scalar_type
);
11715 nunits_vectype
= get_vectype_for_scalar_type (scalar_type
);
11717 if (!nunits_vectype
)
11718 return opt_result::failure_at (stmt
,
11719 "not vectorized: unsupported data-type %T\n",
11722 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
11723 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
11724 return opt_result::failure_at (stmt
,
11725 "not vectorized: different sized vector "
11726 "types in statement, %T and %T\n",
11727 vectype
, nunits_vectype
);
11729 if (dump_enabled_p ())
11731 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n",
11734 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
11735 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
11736 dump_printf (MSG_NOTE
, "\n");
11739 *nunits_vectype_out
= nunits_vectype
;
11740 return opt_result::success ();
11743 /* Try to determine the correct vector type for STMT_INFO, which is a
11744 statement that produces a scalar boolean result. Return the vector
11745 type on success, otherwise return NULL_TREE. */
11748 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
11750 gimple
*stmt
= stmt_info
->stmt
;
11751 tree mask_type
= NULL
;
11752 tree vectype
, scalar_type
;
11754 if (is_gimple_assign (stmt
)
11755 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
11756 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
11758 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
11759 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
11762 return opt_tree::failure_at (stmt
,
11763 "not vectorized: unsupported mask\n");
11769 enum vect_def_type dt
;
11771 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
11773 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
11774 return opt_tree::failure_at (stmt
,
11775 "not vectorized:can't compute mask"
11776 " type for statement, %G", stmt
);
11778 /* No vectype probably means external definition.
11779 Allow it in case there is another operand which
11780 allows to determine mask type. */
11785 mask_type
= vectype
;
11786 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
11787 TYPE_VECTOR_SUBPARTS (vectype
)))
11788 return opt_tree::failure_at (stmt
,
11789 "not vectorized: different sized mask"
11790 " types in statement, %T and %T\n",
11791 mask_type
, vectype
);
11792 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
11793 != VECTOR_BOOLEAN_TYPE_P (vectype
))
11794 return opt_tree::failure_at (stmt
,
11795 "not vectorized: mixed mask and "
11796 "nonmask vector types in statement, "
11798 mask_type
, vectype
);
11801 /* We may compare boolean value loaded as vector of integers.
11802 Fix mask_type in such case. */
11804 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
11805 && gimple_code (stmt
) == GIMPLE_ASSIGN
11806 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
11807 mask_type
= build_same_sized_truth_vector_type (mask_type
);
11810 /* No mask_type should mean loop invariant predicate.
11811 This is probably a subject for optimization in if-conversion. */
11813 return opt_tree::failure_at (stmt
,
11814 "not vectorized: can't compute mask type "
11815 "for statement: %G", stmt
);
11817 return opt_tree::success (mask_type
);