1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
94 if ((kind
== vector_load
|| kind
== unaligned_load
)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
96 kind
= vector_gather_load
;
97 if ((kind
== vector_store
|| kind
== unaligned_store
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_scatter_store
;
101 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
102 body_cost_vec
->safe_push (si
);
104 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
106 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
112 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
114 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
124 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
125 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
127 tree vect_type
, vect
, vect_name
, array_ref
;
130 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
131 vect_type
= TREE_TYPE (TREE_TYPE (array
));
132 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
133 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
134 build_int_cst (size_type_node
, n
),
135 NULL_TREE
, NULL_TREE
);
137 new_stmt
= gimple_build_assign (vect
, array_ref
);
138 vect_name
= make_ssa_name (vect
, new_stmt
);
139 gimple_assign_set_lhs (new_stmt
, vect_name
);
140 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
150 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
151 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
156 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
157 build_int_cst (size_type_node
, n
),
158 NULL_TREE
, NULL_TREE
);
160 new_stmt
= gimple_build_assign (array_ref
, vect
);
161 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
169 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
173 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
183 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
186 tree clobber
= build_clobber (TREE_TYPE (var
));
187 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
188 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
198 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
199 enum vect_relevant relevant
, bool live_p
)
201 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
202 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE
, vect_location
,
206 "mark relevant %d, live %d: %G", relevant
, live_p
,
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE
, vect_location
,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_vec_info old_stmt_info
= stmt_info
;
225 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
227 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
228 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
231 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
232 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
233 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
235 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE
, vect_location
,
240 "already marked relevant/live.\n");
244 worklist
->safe_push (stmt_info
);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT_INFO is simple and all uses of it are invariant. */
253 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
254 loop_vec_info loop_vinfo
)
259 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
263 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
265 enum vect_def_type dt
= vect_uninitialized_def
;
267 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
271 "use not simple.\n");
275 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
281 /* Function vect_stmt_relevant_p.
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
291 CHECKME: what other side effects would the vectorizer allow? */
294 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
295 enum vect_relevant
*relevant
, bool *live_p
)
297 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
299 imm_use_iterator imm_iter
;
303 *relevant
= vect_unused_in_scope
;
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt_info
->stmt
)
308 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
309 *relevant
= vect_used_in_scope
;
311 /* changing memory. */
312 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
313 if (gimple_vdef (stmt_info
->stmt
)
314 && !gimple_clobber_p (stmt_info
->stmt
))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE
, vect_location
,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant
= vect_used_in_scope
;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
325 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
327 basic_block bb
= gimple_bb (USE_STMT (use_p
));
328 if (!flow_bb_inside_loop_p (loop
, bb
))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE
, vect_location
,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p
)))
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
340 gcc_assert (bb
== single_exit (loop
)->dest
);
347 if (*live_p
&& *relevant
== vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE
, vect_location
,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant
= vect_used_only_live
;
356 return (*live_p
|| *relevant
);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
366 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info
))
376 /* STMT has a data_ref. FORNOW this means that its of one of
380 (This should have been verified in analyze_data_refs).
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
389 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
390 if (!assign
|| !gimple_assign_copy_p (assign
))
392 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
393 if (call
&& gimple_call_internal_p (call
))
395 internal_fn ifn
= gimple_call_internal_fn (call
);
396 int mask_index
= internal_fn_mask_index (ifn
);
398 && use
== gimple_call_arg (call
, mask_index
))
400 int stored_value_index
= internal_fn_stored_value_index (ifn
);
401 if (stored_value_index
>= 0
402 && use
== gimple_call_arg (call
, stored_value_index
))
404 if (internal_gather_scatter_fn_p (ifn
)
405 && use
== gimple_call_arg (call
, 1))
411 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
413 operand
= gimple_assign_rhs1 (assign
);
414 if (TREE_CODE (operand
) != SSA_NAME
)
425 Function process_use.
428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
449 Return true if everything is as expected. Return false otherwise. */
452 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
453 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
456 stmt_vec_info dstmt_vinfo
;
457 basic_block bb
, def_bb
;
458 enum vect_def_type dt
;
460 /* case 1: we are only interested in uses that need to be vectorized. Uses
461 that are used for address computation are not considered relevant. */
462 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
463 return opt_result::success ();
465 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
466 return opt_result::failure_at (stmt_vinfo
->stmt
,
468 " unsupported use in stmt.\n");
471 return opt_result::success ();
473 def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
475 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
476 DSTMT_VINFO must have already been processed, because this should be the
477 only way that STMT, which is a reduction-phi, was put in the worklist,
478 as there should be no other uses for DSTMT_VINFO in the loop. So we just
479 check that everything is as expected, and we are done. */
480 bb
= gimple_bb (stmt_vinfo
->stmt
);
481 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
483 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
485 && bb
->loop_father
== def_bb
->loop_father
)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
491 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
492 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
610 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE
, vect_location
,
642 "init: stmt relevant? %G", stmt_info
->stmt
);
644 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt_vec_info stmt_vinfo
= worklist
.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
,
658 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
678 case vect_reduction_def
:
679 gcc_assert (relevant
!= vect_unused_in_scope
);
680 if (relevant
!= vect_unused_in_scope
681 && relevant
!= vect_used_in_scope
682 && relevant
!= vect_used_by_reduction
683 && relevant
!= vect_used_only_live
)
684 return opt_result::failure_at
685 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
688 case vect_nested_cycle
:
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_outer_by_reduction
691 && relevant
!= vect_used_in_outer
)
692 return opt_result::failure_at
693 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
696 case vect_double_reduction_def
:
697 if (relevant
!= vect_unused_in_scope
698 && relevant
!= vect_used_by_reduction
699 && relevant
!= vect_used_only_live
)
700 return opt_result::failure_at
701 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
708 if (is_pattern_stmt_p (stmt_vinfo
))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
715 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
716 tree op
= gimple_assign_rhs1 (assign
);
719 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
722 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
723 loop_vinfo
, relevant
, &worklist
, false);
726 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
727 loop_vinfo
, relevant
, &worklist
, false);
732 for (; i
< gimple_num_ops (assign
); i
++)
734 op
= gimple_op (assign
, i
);
735 if (TREE_CODE (op
) == SSA_NAME
)
738 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
745 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
747 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
749 tree arg
= gimple_call_arg (call
, i
);
751 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
759 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
761 tree op
= USE_FROM_PTR (use_p
);
763 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
771 gather_scatter_info gs_info
;
772 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
775 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
780 } /* while worklist */
782 return opt_result::success ();
785 /* Compute the prologue cost for invariant or constant operands. */
788 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
789 unsigned opno
, enum vect_def_type dt
,
790 stmt_vector_for_cost
*cost_vec
)
792 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
793 tree op
= gimple_op (stmt
, opno
);
794 unsigned prologue_cost
= 0;
796 /* Without looking at the actual initializer a vector of
797 constants can be implemented as load from the constant pool.
798 When all elements are the same we can use a splat. */
799 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
800 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
801 unsigned num_vects_to_check
;
802 unsigned HOST_WIDE_INT const_nunits
;
804 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
805 && ! multiple_p (const_nunits
, group_size
))
807 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
808 nelt_limit
= const_nunits
;
812 /* If either the vector has variable length or the vectors
813 are composed of repeated whole groups we only need to
814 cost construction once. All vectors will be the same. */
815 num_vects_to_check
= 1;
816 nelt_limit
= group_size
;
818 tree elt
= NULL_TREE
;
820 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
822 unsigned si
= j
% group_size
;
824 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
825 /* ??? We're just tracking whether all operands of a single
826 vector initializer are the same, ideally we'd check if
827 we emitted the same one already. */
828 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
832 if (nelt
== nelt_limit
)
834 /* ??? We need to pass down stmt_info for a vector type
835 even if it points to the wrong stmt. */
836 prologue_cost
+= record_stmt_cost
838 dt
== vect_external_def
839 ? (elt
? scalar_to_vec
: vec_construct
)
841 stmt_info
, 0, vect_prologue
);
846 return prologue_cost
;
849 /* Function vect_model_simple_cost.
851 Models cost for simple operations, i.e. those that only emit ncopies of a
852 single op. Right now, this does not account for multiple insns that could
853 be generated for the single vector op. We will handle that shortly. */
856 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
857 enum vect_def_type
*dt
,
860 stmt_vector_for_cost
*cost_vec
)
862 int inside_cost
= 0, prologue_cost
= 0;
864 gcc_assert (cost_vec
!= NULL
);
866 /* ??? Somehow we need to fix this at the callers. */
868 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
872 /* Scan operands and account for prologue cost of constants/externals.
873 ??? This over-estimates cost for multiple uses and should be
875 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
876 tree lhs
= gimple_get_lhs (stmt
);
877 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
879 tree op
= gimple_op (stmt
, i
);
880 enum vect_def_type dt
;
881 if (!op
|| op
== lhs
)
883 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
884 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
885 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
890 /* Cost the "broadcast" of a scalar operand in to a vector operand.
891 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
893 for (int i
= 0; i
< ndts
; i
++)
894 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
895 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
896 stmt_info
, 0, vect_prologue
);
898 /* Adjust for two-operator SLP nodes. */
899 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
902 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
903 stmt_info
, 0, vect_body
);
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
907 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
908 stmt_info
, 0, vect_body
);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE
, vect_location
,
912 "vect_model_simple_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
917 /* Model cost for type demotion and promotion operations. PWR is normally
918 zero for single-step promotions and demotions. It will be one if
919 two-step promotion/demotion is required, and so on. Each additional
920 step doubles the number of instructions required. */
923 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
924 enum vect_def_type
*dt
, int pwr
,
925 stmt_vector_for_cost
*cost_vec
)
928 int inside_cost
= 0, prologue_cost
= 0;
930 for (i
= 0; i
< pwr
+ 1; i
++)
932 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
934 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
935 vec_promote_demote
, stmt_info
, 0,
939 /* FORNOW: Assuming maximum 2 args per stmts. */
940 for (i
= 0; i
< 2; i
++)
941 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
942 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
943 stmt_info
, 0, vect_prologue
);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE
, vect_location
,
947 "vect_model_promotion_demotion_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
951 /* Function vect_model_store_cost
953 Models cost for stores. In the case of grouped accesses, one access
954 has the overhead of the grouped access attributed to it. */
957 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
958 enum vect_def_type dt
,
959 vect_memory_access_type memory_access_type
,
960 vec_load_store_type vls_type
, slp_tree slp_node
,
961 stmt_vector_for_cost
*cost_vec
)
963 unsigned int inside_cost
= 0, prologue_cost
= 0;
964 stmt_vec_info first_stmt_info
= stmt_info
;
965 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
967 /* ??? Somehow we need to fix this at the callers. */
969 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
971 if (vls_type
== VLS_STORE_INVARIANT
)
974 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
977 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
978 stmt_info
, 0, vect_prologue
);
981 /* Grouped stores update all elements in the group at once,
982 so we want the DR for the first statement. */
983 if (!slp_node
&& grouped_access_p
)
984 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
986 /* True if we should include any once-per-group costs as well as
987 the cost of the statement itself. For SLP we only get called
988 once per group anyhow. */
989 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
991 /* We assume that the cost of a single store-lanes instruction is
992 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
993 access is instead being provided by a permute-and-store operation,
994 include the cost of the permutes. */
996 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
998 /* Uses a high and low interleave or shuffle operations for each
1000 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1001 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1002 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1003 stmt_info
, 0, vect_body
);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE
, vect_location
,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1011 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1012 /* Costs of the stores. */
1013 if (memory_access_type
== VMAT_ELEMENTWISE
1014 || memory_access_type
== VMAT_GATHER_SCATTER
)
1016 /* N scalar stores plus extracting the elements. */
1017 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1018 inside_cost
+= record_stmt_cost (cost_vec
,
1019 ncopies
* assumed_nunits
,
1020 scalar_store
, stmt_info
, 0, vect_body
);
1023 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1025 if (memory_access_type
== VMAT_ELEMENTWISE
1026 || memory_access_type
== VMAT_STRIDED_SLP
)
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1030 inside_cost
+= record_stmt_cost (cost_vec
,
1031 ncopies
* assumed_nunits
,
1032 vec_to_scalar
, stmt_info
, 0, vect_body
);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE
, vect_location
,
1037 "vect_model_store_cost: inside_cost = %d, "
1038 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1042 /* Calculate cost of DR's memory access. */
1044 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1045 unsigned int *inside_cost
,
1046 stmt_vector_for_cost
*body_cost_vec
)
1048 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1049 int alignment_support_scheme
1050 = vect_supportable_dr_alignment (dr_info
, false);
1052 switch (alignment_support_scheme
)
1056 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1057 vector_store
, stmt_info
, 0,
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE
, vect_location
,
1062 "vect_model_store_cost: aligned.\n");
1066 case dr_unaligned_supported
:
1068 /* Here, we assign an additional cost for the unaligned store. */
1069 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1070 unaligned_store
, stmt_info
,
1071 DR_MISALIGNMENT (dr_info
),
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE
, vect_location
,
1075 "vect_model_store_cost: unaligned supported by "
1080 case dr_unaligned_unsupported
:
1082 *inside_cost
= VECT_MAX_COST
;
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1086 "vect_model_store_cost: unsupported access.\n");
1096 /* Function vect_model_load_cost
1098 Models cost for loads. In the case of grouped accesses, one access has
1099 the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1104 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1105 vect_memory_access_type memory_access_type
,
1106 slp_instance instance
,
1108 stmt_vector_for_cost
*cost_vec
)
1110 unsigned int inside_cost
= 0, prologue_cost
= 0;
1111 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1113 gcc_assert (cost_vec
);
1115 /* ??? Somehow we need to fix this at the callers. */
1117 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1119 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1121 /* If the load is permuted then the alignment is determined by
1122 the first group element not by the first scalar stmt DR. */
1123 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1124 /* Record the cost for the permutation. */
1126 unsigned assumed_nunits
1127 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1128 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1129 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1130 slp_vf
, instance
, true,
1132 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1133 first_stmt_info
, 0, vect_body
);
1134 /* And adjust the number of loads performed. This handles
1135 redundancies as well as loads that are later dead. */
1136 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1137 bitmap_clear (perm
);
1138 for (unsigned i
= 0;
1139 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1140 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1142 bool load_seen
= false;
1143 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1145 if (i
% assumed_nunits
== 0)
1151 if (bitmap_bit_p (perm
, i
))
1157 <= (DR_GROUP_SIZE (first_stmt_info
)
1158 - DR_GROUP_GAP (first_stmt_info
)
1159 + assumed_nunits
- 1) / assumed_nunits
);
1162 /* Grouped loads read all elements in the group at once,
1163 so we want the DR for the first statement. */
1164 stmt_vec_info first_stmt_info
= stmt_info
;
1165 if (!slp_node
&& grouped_access_p
)
1166 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1168 /* True if we should include any once-per-group costs as well as
1169 the cost of the statement itself. For SLP we only get called
1170 once per group anyhow. */
1171 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1173 /* We assume that the cost of a single load-lanes instruction is
1174 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1175 access is instead being provided by a load-and-permute operation,
1176 include the cost of the permutes. */
1178 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1180 /* Uses an even and odd extract operations or shuffle operations
1181 for each needed permute. */
1182 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1183 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1184 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1185 stmt_info
, 0, vect_body
);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE
, vect_location
,
1189 "vect_model_load_cost: strided group_size = %d .\n",
1193 /* The loads themselves. */
1194 if (memory_access_type
== VMAT_ELEMENTWISE
1195 || memory_access_type
== VMAT_GATHER_SCATTER
)
1197 /* N scalar loads plus gathering them into a vector. */
1198 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1199 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1200 inside_cost
+= record_stmt_cost (cost_vec
,
1201 ncopies
* assumed_nunits
,
1202 scalar_load
, stmt_info
, 0, vect_body
);
1205 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1206 &inside_cost
, &prologue_cost
,
1207 cost_vec
, cost_vec
, true);
1208 if (memory_access_type
== VMAT_ELEMENTWISE
1209 || memory_access_type
== VMAT_STRIDED_SLP
)
1210 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1211 stmt_info
, 0, vect_body
);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE
, vect_location
,
1215 "vect_model_load_cost: inside_cost = %d, "
1216 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1220 /* Calculate cost of DR's memory access. */
1222 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1223 bool add_realign_cost
, unsigned int *inside_cost
,
1224 unsigned int *prologue_cost
,
1225 stmt_vector_for_cost
*prologue_cost_vec
,
1226 stmt_vector_for_cost
*body_cost_vec
,
1227 bool record_prologue_costs
)
1229 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1230 int alignment_support_scheme
1231 = vect_supportable_dr_alignment (dr_info
, false);
1233 switch (alignment_support_scheme
)
1237 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1238 stmt_info
, 0, vect_body
);
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE
, vect_location
,
1242 "vect_model_load_cost: aligned.\n");
1246 case dr_unaligned_supported
:
1248 /* Here, we assign an additional cost for the unaligned load. */
1249 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1250 unaligned_load
, stmt_info
,
1251 DR_MISALIGNMENT (dr_info
),
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE
, vect_location
,
1256 "vect_model_load_cost: unaligned supported by "
1261 case dr_explicit_realign
:
1263 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1264 vector_load
, stmt_info
, 0, vect_body
);
1265 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1266 vec_perm
, stmt_info
, 0, vect_body
);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1271 if (targetm
.vectorize
.builtin_mask_for_load
)
1272 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1273 stmt_info
, 0, vect_body
);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE
, vect_location
,
1277 "vect_model_load_cost: explicit realign\n");
1281 case dr_explicit_realign_optimized
:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "vect_model_load_cost: unaligned software "
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost
&& record_prologue_costs
)
1297 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1298 vector_stmt
, stmt_info
,
1300 if (targetm
.vectorize
.builtin_mask_for_load
)
1301 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1302 vector_stmt
, stmt_info
,
1306 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1307 stmt_info
, 0, vect_body
);
1308 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1309 stmt_info
, 0, vect_body
);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE
, vect_location
,
1313 "vect_model_load_cost: explicit realign optimized"
1319 case dr_unaligned_unsupported
:
1321 *inside_cost
= VECT_MAX_COST
;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1325 "vect_model_load_cost: unsupported access.\n");
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1338 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1339 gimple_stmt_iterator
*gsi
)
1342 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1345 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1349 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1353 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1356 pe
= loop_preheader_edge (loop
);
1357 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1358 gcc_assert (!new_bb
);
1362 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1364 gimple_stmt_iterator gsi_bb_start
;
1366 gcc_assert (bb_vinfo
);
1367 bb
= BB_VINFO_BB (bb_vinfo
);
1368 gsi_bb_start
= gsi_after_labels (bb
);
1369 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1373 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_NOTE
, vect_location
,
1375 "created new init_stmt: %G", new_stmt
);
1378 /* Function vect_init_vector.
1380 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1381 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1382 vector type a vector with all elements equal to VAL is created first.
1383 Place the initialization at BSI if it is not NULL. Otherwise, place the
1384 initialization at the loop preheader.
1385 Return the DEF of INIT_STMT.
1386 It will be used in the vectorization of STMT_INFO. */
1389 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1390 gimple_stmt_iterator
*gsi
)
1395 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1396 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1398 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1399 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1401 /* Scalar boolean value should be transformed into
1402 all zeros or all ones value before building a vector. */
1403 if (VECTOR_BOOLEAN_TYPE_P (type
))
1405 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1406 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1408 if (CONSTANT_CLASS_P (val
))
1409 val
= integer_zerop (val
) ? false_val
: true_val
;
1412 new_temp
= make_ssa_name (TREE_TYPE (type
));
1413 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1414 val
, true_val
, false_val
);
1415 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1419 else if (CONSTANT_CLASS_P (val
))
1420 val
= fold_convert (TREE_TYPE (type
), val
);
1423 new_temp
= make_ssa_name (TREE_TYPE (type
));
1424 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1425 init_stmt
= gimple_build_assign (new_temp
,
1426 fold_build1 (VIEW_CONVERT_EXPR
,
1430 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1431 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1435 val
= build_vector_from_val (type
, val
);
1438 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1439 init_stmt
= gimple_build_assign (new_temp
, val
);
1440 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1444 /* Function vect_get_vec_def_for_operand_1.
1446 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1447 with type DT that will be used in the vectorized stmt. */
1450 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1451 enum vect_def_type dt
)
1454 stmt_vec_info vec_stmt_info
;
1458 /* operand is a constant or a loop invariant. */
1459 case vect_constant_def
:
1460 case vect_external_def
:
1461 /* Code should use vect_get_vec_def_for_operand. */
1464 /* Operand is defined by a loop header phi. In case of nested
1465 cycles we also may have uses of the backedge def. */
1466 case vect_reduction_def
:
1467 case vect_double_reduction_def
:
1468 case vect_nested_cycle
:
1469 case vect_induction_def
:
1470 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1471 || dt
== vect_nested_cycle
);
1474 /* operand is defined inside the loop. */
1475 case vect_internal_def
:
1477 /* Get the def from the vectorized stmt. */
1478 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1479 /* Get vectorized pattern statement. */
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1483 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1484 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1485 gcc_assert (vec_stmt_info
);
1486 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1487 vec_oprnd
= PHI_RESULT (phi
);
1489 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1499 /* Function vect_get_vec_def_for_operand.
1501 OP is an operand in STMT_VINFO. This function returns a (vector) def
1502 that will be used in the vectorized stmt for STMT_VINFO.
1504 In the case that OP is an SSA_NAME which is defined in the loop, then
1505 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1507 In case OP is an invariant or constant, a new stmt that creates a vector def
1508 needs to be introduced. VECTYPE may be used to specify a required type for
1509 vector invariant. */
1512 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1515 enum vect_def_type dt
;
1517 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE
, vect_location
,
1521 "vect_get_vec_def_for_operand: %T\n", op
);
1523 stmt_vec_info def_stmt_info
;
1524 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1525 &def_stmt_info
, &def_stmt
);
1526 gcc_assert (is_simple_use
);
1527 if (def_stmt
&& dump_enabled_p ())
1528 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1530 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1532 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1536 vector_type
= vectype
;
1537 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1538 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1539 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1541 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1543 gcc_assert (vector_type
);
1544 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1547 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1551 /* Function vect_get_vec_def_for_stmt_copy
1553 Return a vector-def for an operand. This function is used when the
1554 vectorized stmt to be created (by the caller to this function) is a "copy"
1555 created in case the vectorized result cannot fit in one vector, and several
1556 copies of the vector-stmt are required. In this case the vector-def is
1557 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1558 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1561 In case the vectorization factor (VF) is bigger than the number
1562 of elements that can fit in a vectype (nunits), we have to generate
1563 more than one vector stmt to vectorize the scalar stmt. This situation
1564 arises when there are multiple data-types operated upon in the loop; the
1565 smallest data-type determines the VF, and as a result, when vectorizing
1566 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1567 vector stmt (each computing a vector of 'nunits' results, and together
1568 computing 'VF' results in each iteration). This function is called when
1569 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1570 which VF=16 and nunits=4, so the number of copies required is 4):
1572 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1574 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1575 VS1.1: vx.1 = memref1 VS1.2
1576 VS1.2: vx.2 = memref2 VS1.3
1577 VS1.3: vx.3 = memref3
1579 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1580 VSnew.1: vz1 = vx.1 + ... VSnew.2
1581 VSnew.2: vz2 = vx.2 + ... VSnew.3
1582 VSnew.3: vz3 = vx.3 + ...
1584 The vectorization of S1 is explained in vectorizable_load.
1585 The vectorization of S2:
1586 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1587 the function 'vect_get_vec_def_for_operand' is called to
1588 get the relevant vector-def for each operand of S2. For operand x it
1589 returns the vector-def 'vx.0'.
1591 To create the remaining copies of the vector-stmt (VSnew.j), this
1592 function is called to get the relevant vector-def for each operand. It is
1593 obtained from the respective VS1.j stmt, which is recorded in the
1594 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1596 For example, to obtain the vector-def 'vx.1' in order to create the
1597 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1598 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1599 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1600 and return its def ('vx.1').
1601 Overall, to create the above sequence this function will be called 3 times:
1602 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1603 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1604 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1607 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1609 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1611 /* Do nothing; can reuse same def. */
1614 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1615 gcc_assert (def_stmt_info
);
1616 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1617 vec_oprnd
= PHI_RESULT (phi
);
1619 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1624 /* Get vectorized definitions for the operands to create a copy of an original
1625 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1628 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1629 vec
<tree
> *vec_oprnds0
,
1630 vec
<tree
> *vec_oprnds1
)
1632 tree vec_oprnd
= vec_oprnds0
->pop ();
1634 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1635 vec_oprnds0
->quick_push (vec_oprnd
);
1637 if (vec_oprnds1
&& vec_oprnds1
->length ())
1639 vec_oprnd
= vec_oprnds1
->pop ();
1640 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1641 vec_oprnds1
->quick_push (vec_oprnd
);
1646 /* Get vectorized definitions for OP0 and OP1. */
1649 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1650 vec
<tree
> *vec_oprnds0
,
1651 vec
<tree
> *vec_oprnds1
,
1656 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1657 auto_vec
<tree
> ops (nops
);
1658 auto_vec
<vec
<tree
> > vec_defs (nops
);
1660 ops
.quick_push (op0
);
1662 ops
.quick_push (op1
);
1664 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1666 *vec_oprnds0
= vec_defs
[0];
1668 *vec_oprnds1
= vec_defs
[1];
1674 vec_oprnds0
->create (1);
1675 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1676 vec_oprnds0
->quick_push (vec_oprnd
);
1680 vec_oprnds1
->create (1);
1681 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1682 vec_oprnds1
->quick_push (vec_oprnd
);
1687 /* Helper function called by vect_finish_replace_stmt and
1688 vect_finish_stmt_generation. Set the location of the new
1689 statement and create and return a stmt_vec_info for it. */
1691 static stmt_vec_info
1692 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1694 vec_info
*vinfo
= stmt_info
->vinfo
;
1696 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1698 if (dump_enabled_p ())
1699 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1701 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1703 /* While EH edges will generally prevent vectorization, stmt might
1704 e.g. be in a must-not-throw region. Ensure newly created stmts
1705 that could throw are part of the same region. */
1706 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1707 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1708 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1710 return vec_stmt_info
;
1713 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1714 which sets the same scalar result as STMT_INFO did. Create and return a
1715 stmt_vec_info for VEC_STMT. */
1718 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1720 gcc_assert (gimple_get_lhs (stmt_info
->stmt
) == gimple_get_lhs (vec_stmt
));
1722 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt_info
->stmt
);
1723 gsi_replace (&gsi
, vec_stmt
, false);
1725 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1728 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1729 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1732 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1733 gimple_stmt_iterator
*gsi
)
1735 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1737 if (!gsi_end_p (*gsi
)
1738 && gimple_has_mem_ops (vec_stmt
))
1740 gimple
*at_stmt
= gsi_stmt (*gsi
);
1741 tree vuse
= gimple_vuse (at_stmt
);
1742 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1744 tree vdef
= gimple_vdef (at_stmt
);
1745 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1746 /* If we have an SSA vuse and insert a store, update virtual
1747 SSA form to avoid triggering the renamer. Do so only
1748 if we can easily see all uses - which is what almost always
1749 happens with the way vectorized stmts are inserted. */
1750 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1751 && ((is_gimple_assign (vec_stmt
)
1752 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1753 || (is_gimple_call (vec_stmt
)
1754 && !(gimple_call_flags (vec_stmt
)
1755 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1757 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1758 gimple_set_vdef (vec_stmt
, new_vdef
);
1759 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1763 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1764 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1767 /* We want to vectorize a call to combined function CFN with function
1768 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1769 as the types of all inputs. Check whether this is possible using
1770 an internal function, returning its code if so or IFN_LAST if not. */
1773 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1774 tree vectype_out
, tree vectype_in
)
1777 if (internal_fn_p (cfn
))
1778 ifn
= as_internal_fn (cfn
);
1780 ifn
= associated_internal_fn (fndecl
);
1781 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1783 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1784 if (info
.vectorizable
)
1786 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1787 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1788 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1789 OPTIMIZE_FOR_SPEED
))
1797 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1798 gimple_stmt_iterator
*);
1800 /* Check whether a load or store statement in the loop described by
1801 LOOP_VINFO is possible in a fully-masked loop. This is testing
1802 whether the vectorizer pass has the appropriate support, as well as
1803 whether the target does.
1805 VLS_TYPE says whether the statement is a load or store and VECTYPE
1806 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1807 says how the load or store is going to be implemented and GROUP_SIZE
1808 is the number of load or store statements in the containing group.
1809 If the access is a gather load or scatter store, GS_INFO describes
1812 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1813 supported, otherwise record the required mask types. */
1816 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1817 vec_load_store_type vls_type
, int group_size
,
1818 vect_memory_access_type memory_access_type
,
1819 gather_scatter_info
*gs_info
)
1821 /* Invariant loads need no special support. */
1822 if (memory_access_type
== VMAT_INVARIANT
)
1825 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1826 machine_mode vecmode
= TYPE_MODE (vectype
);
1827 bool is_load
= (vls_type
== VLS_LOAD
);
1828 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1831 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1832 : !vect_store_lanes_supported (vectype
, group_size
, true))
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1836 "can't use a fully-masked loop because the"
1837 " target doesn't have an appropriate masked"
1838 " load/store-lanes instruction.\n");
1839 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1842 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1843 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1847 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1849 internal_fn ifn
= (is_load
1850 ? IFN_MASK_GATHER_LOAD
1851 : IFN_MASK_SCATTER_STORE
);
1852 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1853 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1854 gs_info
->memory_type
,
1855 TYPE_SIGN (offset_type
),
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1860 "can't use a fully-masked loop because the"
1861 " target doesn't have an appropriate masked"
1862 " gather load or scatter store instruction.\n");
1863 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1866 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1867 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1871 if (memory_access_type
!= VMAT_CONTIGUOUS
1872 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1874 /* Element X of the data must come from iteration i * VF + X of the
1875 scalar loop. We need more work to support other mappings. */
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1878 "can't use a fully-masked loop because an access"
1879 " isn't contiguous.\n");
1880 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1884 machine_mode mask_mode
;
1885 if (!(targetm
.vectorize
.get_mask_mode
1886 (GET_MODE_NUNITS (vecmode
),
1887 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1888 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1890 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1892 "can't use a fully-masked loop because the target"
1893 " doesn't have the appropriate masked load or"
1895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1898 /* We might load more scalars than we need for permuting SLP loads.
1899 We checked in get_group_load_store_type that the extra elements
1900 don't leak into a new vector. */
1901 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1902 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1903 unsigned int nvectors
;
1904 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1905 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1910 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1911 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1912 that needs to be applied to all loads and stores in a vectorized loop.
1913 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1915 MASK_TYPE is the type of both masks. If new statements are needed,
1916 insert them before GSI. */
1919 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1920 gimple_stmt_iterator
*gsi
)
1922 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1926 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1927 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1928 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1929 vec_mask
, loop_mask
);
1930 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1934 /* Determine whether we can use a gather load or scatter store to vectorize
1935 strided load or store STMT_INFO by truncating the current offset to a
1936 smaller width. We need to be able to construct an offset vector:
1938 { 0, X, X*2, X*3, ... }
1940 without loss of precision, where X is STMT_INFO's DR_STEP.
1942 Return true if this is possible, describing the gather load or scatter
1943 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1946 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1947 loop_vec_info loop_vinfo
, bool masked_p
,
1948 gather_scatter_info
*gs_info
)
1950 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1951 data_reference
*dr
= dr_info
->dr
;
1952 tree step
= DR_STEP (dr
);
1953 if (TREE_CODE (step
) != INTEGER_CST
)
1955 /* ??? Perhaps we could use range information here? */
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE
, vect_location
,
1958 "cannot truncate variable step.\n");
1962 /* Get the number of bits in an element. */
1963 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1964 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1965 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1967 /* Set COUNT to the upper limit on the number of elements - 1.
1968 Start with the maximum vectorization factor. */
1969 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1971 /* Try lowering COUNT to the number of scalar latch iterations. */
1972 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1973 widest_int max_iters
;
1974 if (max_loop_iterations (loop
, &max_iters
)
1975 && max_iters
< count
)
1976 count
= max_iters
.to_shwi ();
1978 /* Try scales of 1 and the element size. */
1979 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1980 wi::overflow_type overflow
= wi::OVF_NONE
;
1981 for (int i
= 0; i
< 2; ++i
)
1983 int scale
= scales
[i
];
1985 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1988 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1989 in OFFSET_BITS bits. */
1990 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1993 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1994 if (wi::min_precision (range
, sign
) > element_bits
)
1996 overflow
= wi::OVF_UNKNOWN
;
2000 /* See whether the target supports the operation. */
2001 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2002 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2003 memory_type
, element_bits
, sign
, scale
,
2004 &gs_info
->ifn
, &gs_info
->element_type
))
2007 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2010 gs_info
->decl
= NULL_TREE
;
2011 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2012 but we don't need to store that here. */
2013 gs_info
->base
= NULL_TREE
;
2014 gs_info
->offset
= fold_convert (offset_type
, step
);
2015 gs_info
->offset_dt
= vect_constant_def
;
2016 gs_info
->offset_vectype
= NULL_TREE
;
2017 gs_info
->scale
= scale
;
2018 gs_info
->memory_type
= memory_type
;
2022 if (overflow
&& dump_enabled_p ())
2023 dump_printf_loc (MSG_NOTE
, vect_location
,
2024 "truncating gather/scatter offset to %d bits"
2025 " might change its value.\n", element_bits
);
2030 /* Return true if we can use gather/scatter internal functions to
2031 vectorize STMT_INFO, which is a grouped or strided load or store.
2032 MASKED_P is true if load or store is conditional. When returning
2033 true, fill in GS_INFO with the information required to perform the
2037 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2038 loop_vec_info loop_vinfo
, bool masked_p
,
2039 gather_scatter_info
*gs_info
)
2041 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2043 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2046 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2047 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2048 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2049 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2051 /* Enforced by vect_check_gather_scatter. */
2052 gcc_assert (element_bits
>= offset_bits
);
2054 /* If the elements are wider than the offset, convert the offset to the
2055 same width, without changing its sign. */
2056 if (element_bits
> offset_bits
)
2058 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2059 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2060 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_NOTE
, vect_location
,
2065 "using gather/scatter for strided/grouped access,"
2066 " scale = %d\n", gs_info
->scale
);
2071 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2072 elements with a known constant step. Return -1 if that step
2073 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2076 compare_step_with_zero (stmt_vec_info stmt_info
)
2078 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2079 return tree_int_cst_compare (vect_dr_behavior (dr_info
)->step
,
2083 /* If the target supports a permute mask that reverses the elements in
2084 a vector of type VECTYPE, return that mask, otherwise return null. */
2087 perm_mask_for_reverse (tree vectype
)
2089 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2091 /* The encoding has a single stepped pattern. */
2092 vec_perm_builder
sel (nunits
, 1, 3);
2093 for (int i
= 0; i
< 3; ++i
)
2094 sel
.quick_push (nunits
- 1 - i
);
2096 vec_perm_indices
indices (sel
, 1, nunits
);
2097 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2099 return vect_gen_perm_mask_checked (vectype
, indices
);
2102 /* STMT_INFO is either a masked or unconditional store. Return the value
2106 vect_get_store_rhs (stmt_vec_info stmt_info
)
2108 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2110 gcc_assert (gimple_assign_single_p (assign
));
2111 return gimple_assign_rhs1 (assign
);
2113 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2115 internal_fn ifn
= gimple_call_internal_fn (call
);
2116 int index
= internal_fn_stored_value_index (ifn
);
2117 gcc_assert (index
>= 0);
2118 return gimple_call_arg (call
, index
);
2123 /* A subroutine of get_load_store_type, with a subset of the same
2124 arguments. Handle the case where STMT_INFO is part of a grouped load
2127 For stores, the statements in the group are all consecutive
2128 and there is no gap at the end. For loads, the statements in the
2129 group might not be consecutive; there can be gaps between statements
2130 as well as at the end. */
2133 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2134 bool masked_p
, vec_load_store_type vls_type
,
2135 vect_memory_access_type
*memory_access_type
,
2136 gather_scatter_info
*gs_info
)
2138 vec_info
*vinfo
= stmt_info
->vinfo
;
2139 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2140 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2141 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2142 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2143 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2144 bool single_element_p
= (stmt_info
== first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2146 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2147 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p
= false;
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p
= (!masked_p
2156 && vls_type
== VLS_LOAD
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2169 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2171 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2172 separated by the stride, until we have a complete vector.
2173 Fall back to scalar accesses if that isn't possible. */
2174 if (multiple_p (nunits
, group_size
))
2175 *memory_access_type
= VMAT_STRIDED_SLP
;
2177 *memory_access_type
= VMAT_ELEMENTWISE
;
2181 overrun_p
= loop_vinfo
&& gap
!= 0;
2182 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2185 "Grouped store with gaps requires"
2186 " non-consecutive accesses\n");
2189 /* An overrun is fine if the trailing elements are smaller
2190 than the alignment boundary B. Every vector access will
2191 be a multiple of B and so we are guaranteed to access a
2192 non-gap element in the same B-sized block. */
2194 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2195 / vect_get_scalar_dr_size (first_dr_info
)))
2197 if (overrun_p
&& !can_overrun_p
)
2199 if (dump_enabled_p ())
2200 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2201 "Peeling for outer loop is not supported\n");
2204 *memory_access_type
= VMAT_CONTIGUOUS
;
2209 /* We can always handle this case using elementwise accesses,
2210 but see if something more efficient is available. */
2211 *memory_access_type
= VMAT_ELEMENTWISE
;
2213 /* If there is a gap at the end of the group then these optimizations
2214 would access excess elements in the last iteration. */
2215 bool would_overrun_p
= (gap
!= 0);
2216 /* An overrun is fine if the trailing elements are smaller than the
2217 alignment boundary B. Every vector access will be a multiple of B
2218 and so we are guaranteed to access a non-gap element in the
2219 same B-sized block. */
2222 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2223 / vect_get_scalar_dr_size (first_dr_info
)))
2224 would_overrun_p
= false;
2226 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2227 && (can_overrun_p
|| !would_overrun_p
)
2228 && compare_step_with_zero (stmt_info
) > 0)
2230 /* First cope with the degenerate case of a single-element
2232 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2233 *memory_access_type
= VMAT_CONTIGUOUS
;
2235 /* Otherwise try using LOAD/STORE_LANES. */
2236 if (*memory_access_type
== VMAT_ELEMENTWISE
2237 && (vls_type
== VLS_LOAD
2238 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2239 : vect_store_lanes_supported (vectype
, group_size
,
2242 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2243 overrun_p
= would_overrun_p
;
2246 /* If that fails, try using permuting loads. */
2247 if (*memory_access_type
== VMAT_ELEMENTWISE
2248 && (vls_type
== VLS_LOAD
2249 ? vect_grouped_load_supported (vectype
, single_element_p
,
2251 : vect_grouped_store_supported (vectype
, group_size
)))
2253 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2254 overrun_p
= would_overrun_p
;
2258 /* As a last resort, trying using a gather load or scatter store.
2260 ??? Although the code can handle all group sizes correctly,
2261 it probably isn't a win to use separate strided accesses based
2262 on nearby locations. Or, even if it's a win over scalar code,
2263 it might not be a win over vectorizing at a lower VF, if that
2264 allows us to use contiguous accesses. */
2265 if (*memory_access_type
== VMAT_ELEMENTWISE
2268 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2270 *memory_access_type
= VMAT_GATHER_SCATTER
;
2273 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2275 /* STMT is the leader of the group. Check the operands of all the
2276 stmts of the group. */
2277 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2278 while (next_stmt_info
)
2280 tree op
= vect_get_store_rhs (next_stmt_info
);
2281 enum vect_def_type dt
;
2282 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2286 "use not simple.\n");
2289 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2295 gcc_assert (can_overrun_p
);
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2298 "Data access with gaps requires scalar "
2300 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2306 /* A subroutine of get_load_store_type, with a subset of the same
2307 arguments. Handle the case where STMT_INFO is a load or store that
2308 accesses consecutive elements with a negative step. */
2310 static vect_memory_access_type
2311 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2312 vec_load_store_type vls_type
,
2313 unsigned int ncopies
)
2315 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2316 dr_alignment_support alignment_support_scheme
;
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2322 "multiple types with negative step.\n");
2323 return VMAT_ELEMENTWISE
;
2326 alignment_support_scheme
= vect_supportable_dr_alignment (dr_info
, false);
2327 if (alignment_support_scheme
!= dr_aligned
2328 && alignment_support_scheme
!= dr_unaligned_supported
)
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2332 "negative step but alignment required.\n");
2333 return VMAT_ELEMENTWISE
;
2336 if (vls_type
== VLS_STORE_INVARIANT
)
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE
, vect_location
,
2340 "negative step with invariant source;"
2341 " no permute needed.\n");
2342 return VMAT_CONTIGUOUS_DOWN
;
2345 if (!perm_mask_for_reverse (vectype
))
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2349 "negative step and reversing not supported.\n");
2350 return VMAT_ELEMENTWISE
;
2353 return VMAT_CONTIGUOUS_REVERSE
;
2356 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2357 if there is a memory access type that the vectorized form can use,
2358 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2359 or scatters, fill in GS_INFO accordingly.
2361 SLP says whether we're performing SLP rather than loop vectorization.
2362 MASKED_P is true if the statement is conditional on a vectorized mask.
2363 VECTYPE is the vector type that the vectorized statements will use.
2364 NCOPIES is the number of vector statements that will be needed. */
2367 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2368 bool masked_p
, vec_load_store_type vls_type
,
2369 unsigned int ncopies
,
2370 vect_memory_access_type
*memory_access_type
,
2371 gather_scatter_info
*gs_info
)
2373 vec_info
*vinfo
= stmt_info
->vinfo
;
2374 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2375 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2378 *memory_access_type
= VMAT_GATHER_SCATTER
;
2379 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2381 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2382 &gs_info
->offset_dt
,
2383 &gs_info
->offset_vectype
))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2387 "%s index use not simple.\n",
2388 vls_type
== VLS_LOAD
? "gather" : "scatter");
2392 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2394 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2395 vls_type
, memory_access_type
, gs_info
))
2398 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2402 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2404 *memory_access_type
= VMAT_GATHER_SCATTER
;
2406 *memory_access_type
= VMAT_ELEMENTWISE
;
2410 int cmp
= compare_step_with_zero (stmt_info
);
2412 *memory_access_type
= get_negative_load_store_type
2413 (stmt_info
, vectype
, vls_type
, ncopies
);
2416 gcc_assert (vls_type
== VLS_LOAD
);
2417 *memory_access_type
= VMAT_INVARIANT
;
2420 *memory_access_type
= VMAT_CONTIGUOUS
;
2423 if ((*memory_access_type
== VMAT_ELEMENTWISE
2424 || *memory_access_type
== VMAT_STRIDED_SLP
)
2425 && !nunits
.is_constant ())
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2429 "Not using elementwise accesses due to variable "
2430 "vectorization factor.\n");
2434 /* FIXME: At the moment the cost model seems to underestimate the
2435 cost of using elementwise accesses. This check preserves the
2436 traditional behavior until that can be fixed. */
2437 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2438 if (!first_stmt_info
)
2439 first_stmt_info
= stmt_info
;
2440 if (*memory_access_type
== VMAT_ELEMENTWISE
2441 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2442 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2443 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2444 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2448 "not falling back to elementwise accesses\n");
2454 /* Return true if boolean argument MASK is suitable for vectorizing
2455 conditional load or store STMT_INFO. When returning true, store the type
2456 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2457 in *MASK_VECTYPE_OUT. */
2460 vect_check_load_store_mask (stmt_vec_info stmt_info
, tree mask
,
2461 vect_def_type
*mask_dt_out
,
2462 tree
*mask_vectype_out
)
2464 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2466 if (dump_enabled_p ())
2467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2468 "mask argument is not a boolean.\n");
2472 if (TREE_CODE (mask
) != SSA_NAME
)
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2476 "mask argument is not an SSA name.\n");
2480 enum vect_def_type mask_dt
;
2482 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2486 "mask use not simple.\n");
2490 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2492 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2494 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2498 "could not find an appropriate vector mask type.\n");
2502 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2503 TYPE_VECTOR_SUBPARTS (vectype
)))
2505 if (dump_enabled_p ())
2506 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2507 "vector mask type %T",
2508 " does not match vector data type %T.\n",
2509 mask_vectype
, vectype
);
2514 *mask_dt_out
= mask_dt
;
2515 *mask_vectype_out
= mask_vectype
;
2519 /* Return true if stored value RHS is suitable for vectorizing store
2520 statement STMT_INFO. When returning true, store the type of the
2521 definition in *RHS_DT_OUT, the type of the vectorized store value in
2522 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2525 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2526 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2527 vec_load_store_type
*vls_type_out
)
2529 /* In the case this is a store from a constant make sure
2530 native_encode_expr can handle it. */
2531 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2535 "cannot encode constant as a byte sequence.\n");
2539 enum vect_def_type rhs_dt
;
2541 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2545 "use not simple.\n");
2549 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2550 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2552 if (dump_enabled_p ())
2553 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2554 "incompatible vector types.\n");
2558 *rhs_dt_out
= rhs_dt
;
2559 *rhs_vectype_out
= rhs_vectype
;
2560 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2561 *vls_type_out
= VLS_STORE_INVARIANT
;
2563 *vls_type_out
= VLS_STORE
;
2567 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2568 Note that we support masks with floating-point type, in which case the
2569 floats are interpreted as a bitmask. */
2572 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2574 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2575 return build_int_cst (masktype
, -1);
2576 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2578 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2579 mask
= build_vector_from_val (masktype
, mask
);
2580 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2582 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2586 for (int j
= 0; j
< 6; ++j
)
2588 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2589 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2590 mask
= build_vector_from_val (masktype
, mask
);
2591 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2596 /* Build an all-zero merge value of type VECTYPE while vectorizing
2597 STMT_INFO as a gather load. */
2600 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2603 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2604 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2605 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2609 for (int j
= 0; j
< 6; ++j
)
2611 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2612 merge
= build_real (TREE_TYPE (vectype
), r
);
2616 merge
= build_vector_from_val (vectype
, merge
);
2617 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2620 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2621 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2622 the gather load operation. If the load is conditional, MASK is the
2623 unvectorized condition and MASK_DT is its definition type, otherwise
2627 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2628 gimple_stmt_iterator
*gsi
,
2629 stmt_vec_info
*vec_stmt
,
2630 gather_scatter_info
*gs_info
,
2633 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2634 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2635 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2636 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2637 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2638 edge pe
= loop_preheader_edge (loop
);
2639 enum { NARROW
, NONE
, WIDEN
} modifier
;
2640 poly_uint64 gather_off_nunits
2641 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2643 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2644 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2645 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2646 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2647 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2648 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2649 tree scaletype
= TREE_VALUE (arglist
);
2650 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2651 && (!mask
|| types_compatible_p (srctype
, masktype
)));
2653 tree perm_mask
= NULL_TREE
;
2654 tree mask_perm_mask
= NULL_TREE
;
2655 if (known_eq (nunits
, gather_off_nunits
))
2657 else if (known_eq (nunits
* 2, gather_off_nunits
))
2661 /* Currently widening gathers and scatters are only supported for
2662 fixed-length vectors. */
2663 int count
= gather_off_nunits
.to_constant ();
2664 vec_perm_builder
sel (count
, count
, 1);
2665 for (int i
= 0; i
< count
; ++i
)
2666 sel
.quick_push (i
| (count
/ 2));
2668 vec_perm_indices
indices (sel
, 1, count
);
2669 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2672 else if (known_eq (nunits
, gather_off_nunits
* 2))
2676 /* Currently narrowing gathers and scatters are only supported for
2677 fixed-length vectors. */
2678 int count
= nunits
.to_constant ();
2679 vec_perm_builder
sel (count
, count
, 1);
2680 sel
.quick_grow (count
);
2681 for (int i
= 0; i
< count
; ++i
)
2682 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2683 vec_perm_indices
indices (sel
, 2, count
);
2684 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2690 for (int i
= 0; i
< count
; ++i
)
2691 sel
[i
] = i
| (count
/ 2);
2692 indices
.new_vector (sel
, 2, count
);
2693 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2699 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2700 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2702 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2703 if (!is_gimple_min_invariant (ptr
))
2706 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2707 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2708 gcc_assert (!new_bb
);
2711 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2713 tree vec_oprnd0
= NULL_TREE
;
2714 tree vec_mask
= NULL_TREE
;
2715 tree src_op
= NULL_TREE
;
2716 tree mask_op
= NULL_TREE
;
2717 tree prev_res
= NULL_TREE
;
2718 stmt_vec_info prev_stmt_info
= NULL
;
2722 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2723 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2726 for (int j
= 0; j
< ncopies
; ++j
)
2729 if (modifier
== WIDEN
&& (j
& 1))
2730 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2731 perm_mask
, stmt_info
, gsi
);
2734 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2736 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2739 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2741 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2742 TYPE_VECTOR_SUBPARTS (idxtype
)));
2743 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2744 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2745 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2746 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2752 if (mask_perm_mask
&& (j
& 1))
2753 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2754 mask_perm_mask
, stmt_info
, gsi
);
2758 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2760 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2764 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2767 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
)),
2768 TYPE_VECTOR_SUBPARTS (masktype
)));
2769 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2770 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2772 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2773 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2780 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2783 stmt_vec_info new_stmt_info
;
2784 if (!useless_type_conversion_p (vectype
, rettype
))
2786 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2787 TYPE_VECTOR_SUBPARTS (rettype
)));
2788 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2789 gimple_call_set_lhs (new_call
, op
);
2790 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2791 var
= make_ssa_name (vec_dest
);
2792 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2793 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2795 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2799 var
= make_ssa_name (vec_dest
, new_call
);
2800 gimple_call_set_lhs (new_call
, var
);
2802 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2805 if (modifier
== NARROW
)
2812 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2814 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2817 if (prev_stmt_info
== NULL
)
2818 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2820 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2821 prev_stmt_info
= new_stmt_info
;
2825 /* Prepare the base and offset in GS_INFO for vectorization.
2826 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2827 to the vectorized offset argument for the first copy of STMT_INFO.
2828 STMT_INFO is the statement described by GS_INFO and LOOP is the
2832 vect_get_gather_scatter_ops (struct loop
*loop
, stmt_vec_info stmt_info
,
2833 gather_scatter_info
*gs_info
,
2834 tree
*dataref_ptr
, tree
*vec_offset
)
2836 gimple_seq stmts
= NULL
;
2837 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2841 edge pe
= loop_preheader_edge (loop
);
2842 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2843 gcc_assert (!new_bb
);
2845 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2846 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2847 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2851 /* Prepare to implement a grouped or strided load or store using
2852 the gather load or scatter store operation described by GS_INFO.
2853 STMT_INFO is the load or store statement.
2855 Set *DATAREF_BUMP to the amount that should be added to the base
2856 address after each copy of the vectorized statement. Set *VEC_OFFSET
2857 to an invariant offset vector in which element I has the value
2858 I * DR_STEP / SCALE. */
2861 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2862 loop_vec_info loop_vinfo
,
2863 gather_scatter_info
*gs_info
,
2864 tree
*dataref_bump
, tree
*vec_offset
)
2866 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2867 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2868 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2871 tree bump
= size_binop (MULT_EXPR
,
2872 fold_convert (sizetype
, DR_STEP (dr
)),
2873 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2874 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2876 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2878 /* The offset given in GS_INFO can have pointer type, so use the element
2879 type of the vector instead. */
2880 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2881 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2882 offset_type
= TREE_TYPE (offset_vectype
);
2884 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2885 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
2886 ssize_int (gs_info
->scale
));
2887 step
= fold_convert (offset_type
, step
);
2888 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2890 /* Create {0, X, X*2, X*3, ...}. */
2891 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
2892 build_zero_cst (offset_type
), step
);
2894 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2897 /* Return the amount that should be added to a vector pointer to move
2898 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2899 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2903 vect_get_data_ptr_increment (dr_vec_info
*dr_info
, tree aggr_type
,
2904 vect_memory_access_type memory_access_type
)
2906 if (memory_access_type
== VMAT_INVARIANT
)
2907 return size_zero_node
;
2909 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2910 tree step
= vect_dr_behavior (dr_info
)->step
;
2911 if (tree_int_cst_sgn (step
) == -1)
2912 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2916 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2919 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2920 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
2921 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2924 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2925 vec_info
*vinfo
= stmt_info
->vinfo
;
2926 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2929 op
= gimple_call_arg (stmt
, 0);
2930 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2931 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2933 /* Multiple types in SLP are handled by creating the appropriate number of
2934 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2939 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2941 gcc_assert (ncopies
>= 1);
2943 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2947 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2948 unsigned word_bytes
;
2949 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
2952 /* The encoding uses one stepped pattern for each byte in the word. */
2953 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2954 for (unsigned i
= 0; i
< 3; ++i
)
2955 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2956 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2958 vec_perm_indices
indices (elts
, 1, num_bytes
);
2959 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2964 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2965 DUMP_VECT_SCOPE ("vectorizable_bswap");
2968 record_stmt_cost (cost_vec
,
2969 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2970 record_stmt_cost (cost_vec
,
2971 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2976 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2979 vec
<tree
> vec_oprnds
= vNULL
;
2980 stmt_vec_info new_stmt_info
= NULL
;
2981 stmt_vec_info prev_stmt_info
= NULL
;
2982 for (unsigned j
= 0; j
< ncopies
; j
++)
2986 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
2988 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
2990 /* Arguments are ready. create the new vector stmt. */
2993 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2996 tree tem
= make_ssa_name (char_vectype
);
2997 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2998 char_vectype
, vop
));
2999 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3000 tree tem2
= make_ssa_name (char_vectype
);
3001 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3002 tem
, tem
, bswap_vconst
);
3003 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3004 tem
= make_ssa_name (vectype
);
3005 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3008 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3010 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3017 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3019 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3021 prev_stmt_info
= new_stmt_info
;
3024 vec_oprnds
.release ();
3028 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3029 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3030 in a single step. On success, store the binary pack code in
3034 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3035 tree_code
*convert_code
)
3037 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3038 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3042 int multi_step_cvt
= 0;
3043 auto_vec
<tree
, 8> interm_types
;
3044 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3045 &code
, &multi_step_cvt
,
3050 *convert_code
= code
;
3054 /* Function vectorizable_call.
3056 Check if STMT_INFO performs a function call that can be vectorized.
3057 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3058 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3059 Return true if STMT_INFO is vectorizable in this way. */
3062 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3063 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3064 stmt_vector_for_cost
*cost_vec
)
3070 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3071 stmt_vec_info prev_stmt_info
;
3072 tree vectype_out
, vectype_in
;
3073 poly_uint64 nunits_in
;
3074 poly_uint64 nunits_out
;
3075 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3076 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3077 vec_info
*vinfo
= stmt_info
->vinfo
;
3078 tree fndecl
, new_temp
, rhs_type
;
3079 enum vect_def_type dt
[4]
3080 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3081 vect_unknown_def_type
};
3082 int ndts
= ARRAY_SIZE (dt
);
3084 auto_vec
<tree
, 8> vargs
;
3085 auto_vec
<tree
, 8> orig_vargs
;
3086 enum { NARROW
, NONE
, WIDEN
} modifier
;
3090 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3093 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3097 /* Is STMT_INFO a vectorizable call? */
3098 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3102 if (gimple_call_internal_p (stmt
)
3103 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3104 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3105 /* Handled by vectorizable_load and vectorizable_store. */
3108 if (gimple_call_lhs (stmt
) == NULL_TREE
3109 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3112 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3114 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3116 /* Process function arguments. */
3117 rhs_type
= NULL_TREE
;
3118 vectype_in
= NULL_TREE
;
3119 nargs
= gimple_call_num_args (stmt
);
3121 /* Bail out if the function has more than three arguments, we do not have
3122 interesting builtin functions to vectorize with more than two arguments
3123 except for fma. No arguments is also not good. */
3124 if (nargs
== 0 || nargs
> 4)
3127 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3128 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3129 if (cfn
== CFN_GOMP_SIMD_LANE
)
3132 rhs_type
= unsigned_type_node
;
3136 if (internal_fn_p (cfn
))
3137 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3139 for (i
= 0; i
< nargs
; i
++)
3143 op
= gimple_call_arg (stmt
, i
);
3144 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &opvectype
))
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3148 "use not simple.\n");
3152 /* Skip the mask argument to an internal function. This operand
3153 has been converted via a pattern if necessary. */
3154 if ((int) i
== mask_opno
)
3157 /* We can only handle calls with arguments of the same type. */
3159 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3163 "argument types differ.\n");
3167 rhs_type
= TREE_TYPE (op
);
3170 vectype_in
= opvectype
;
3172 && opvectype
!= vectype_in
)
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3176 "argument vector types differ.\n");
3180 /* If all arguments are external or constant defs use a vector type with
3181 the same size as the output vector type. */
3183 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3185 gcc_assert (vectype_in
);
3188 if (dump_enabled_p ())
3189 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3190 "no vectype for scalar type %T\n", rhs_type
);
3196 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3197 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3198 if (known_eq (nunits_in
* 2, nunits_out
))
3200 else if (known_eq (nunits_out
, nunits_in
))
3202 else if (known_eq (nunits_out
* 2, nunits_in
))
3207 /* We only handle functions that do not read or clobber memory. */
3208 if (gimple_vuse (stmt
))
3210 if (dump_enabled_p ())
3211 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3212 "function reads from or writes to memory.\n");
3216 /* For now, we only vectorize functions if a target specific builtin
3217 is available. TODO -- in some cases, it might be profitable to
3218 insert the calls for pieces of the vector, in order to be able
3219 to vectorize other operations in the loop. */
3221 internal_fn ifn
= IFN_LAST
;
3222 tree callee
= gimple_call_fndecl (stmt
);
3224 /* First try using an internal function. */
3225 tree_code convert_code
= ERROR_MARK
;
3227 && (modifier
== NONE
3228 || (modifier
== NARROW
3229 && simple_integer_narrowing (vectype_out
, vectype_in
,
3231 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3234 /* If that fails, try asking for a target-specific built-in function. */
3235 if (ifn
== IFN_LAST
)
3237 if (cfn
!= CFN_LAST
)
3238 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3239 (cfn
, vectype_out
, vectype_in
);
3241 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3242 (callee
, vectype_out
, vectype_in
);
3245 if (ifn
== IFN_LAST
&& !fndecl
)
3247 if (cfn
== CFN_GOMP_SIMD_LANE
3250 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3251 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3252 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3253 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3255 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3256 { 0, 1, 2, ... vf - 1 } vector. */
3257 gcc_assert (nargs
== 0);
3259 else if (modifier
== NONE
3260 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3261 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3262 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3263 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3264 vectype_in
, cost_vec
);
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3269 "function is not vectorizable.\n");
3276 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3277 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3279 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3281 /* Sanity check: make sure that at least one copy of the vectorized stmt
3282 needs to be generated. */
3283 gcc_assert (ncopies
>= 1);
3285 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3286 if (!vec_stmt
) /* transformation not required. */
3288 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3289 DUMP_VECT_SCOPE ("vectorizable_call");
3290 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3291 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3292 record_stmt_cost (cost_vec
, ncopies
/ 2,
3293 vec_promote_demote
, stmt_info
, 0, vect_body
);
3295 if (loop_vinfo
&& mask_opno
>= 0)
3297 unsigned int nvectors
= (slp_node
3298 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3300 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
);
3307 if (dump_enabled_p ())
3308 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3311 scalar_dest
= gimple_call_lhs (stmt
);
3312 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3314 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3316 stmt_vec_info new_stmt_info
= NULL
;
3317 prev_stmt_info
= NULL
;
3318 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3320 tree prev_res
= NULL_TREE
;
3321 vargs
.safe_grow (nargs
);
3322 orig_vargs
.safe_grow (nargs
);
3323 for (j
= 0; j
< ncopies
; ++j
)
3325 /* Build argument list for the vectorized call. */
3328 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3329 vec
<tree
> vec_oprnds0
;
3331 for (i
= 0; i
< nargs
; i
++)
3332 vargs
[i
] = gimple_call_arg (stmt
, i
);
3333 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3334 vec_oprnds0
= vec_defs
[0];
3336 /* Arguments are ready. Create the new vector stmt. */
3337 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3340 for (k
= 0; k
< nargs
; k
++)
3342 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3343 vargs
[k
] = vec_oprndsk
[i
];
3345 if (modifier
== NARROW
)
3347 /* We don't define any narrowing conditional functions
3349 gcc_assert (mask_opno
< 0);
3350 tree half_res
= make_ssa_name (vectype_in
);
3352 = gimple_build_call_internal_vec (ifn
, vargs
);
3353 gimple_call_set_lhs (call
, half_res
);
3354 gimple_call_set_nothrow (call
, true);
3356 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3359 prev_res
= half_res
;
3362 new_temp
= make_ssa_name (vec_dest
);
3364 = gimple_build_assign (new_temp
, convert_code
,
3365 prev_res
, half_res
);
3367 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3372 if (mask_opno
>= 0 && masked_loop_p
)
3374 unsigned int vec_num
= vec_oprnds0
.length ();
3375 /* Always true for SLP. */
3376 gcc_assert (ncopies
== 1);
3377 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3379 vargs
[mask_opno
] = prepare_load_store_mask
3380 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3384 if (ifn
!= IFN_LAST
)
3385 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3387 call
= gimple_build_call_vec (fndecl
, vargs
);
3388 new_temp
= make_ssa_name (vec_dest
, call
);
3389 gimple_call_set_lhs (call
, new_temp
);
3390 gimple_call_set_nothrow (call
, true);
3392 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3394 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3397 for (i
= 0; i
< nargs
; i
++)
3399 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3400 vec_oprndsi
.release ();
3405 for (i
= 0; i
< nargs
; i
++)
3407 op
= gimple_call_arg (stmt
, i
);
3410 = vect_get_vec_def_for_operand (op
, stmt_info
);
3413 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3415 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3418 if (mask_opno
>= 0 && masked_loop_p
)
3420 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3423 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3424 vargs
[mask_opno
], gsi
);
3427 if (cfn
== CFN_GOMP_SIMD_LANE
)
3429 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3431 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3432 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3433 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3434 new_temp
= make_ssa_name (vec_dest
);
3435 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3437 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3439 else if (modifier
== NARROW
)
3441 /* We don't define any narrowing conditional functions at
3443 gcc_assert (mask_opno
< 0);
3444 tree half_res
= make_ssa_name (vectype_in
);
3445 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3446 gimple_call_set_lhs (call
, half_res
);
3447 gimple_call_set_nothrow (call
, true);
3449 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3452 prev_res
= half_res
;
3455 new_temp
= make_ssa_name (vec_dest
);
3456 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3457 prev_res
, half_res
);
3459 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3464 if (ifn
!= IFN_LAST
)
3465 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3467 call
= gimple_build_call_vec (fndecl
, vargs
);
3468 new_temp
= make_ssa_name (vec_dest
, call
);
3469 gimple_call_set_lhs (call
, new_temp
);
3470 gimple_call_set_nothrow (call
, true);
3472 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3475 if (j
== (modifier
== NARROW
? 1 : 0))
3476 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3478 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3480 prev_stmt_info
= new_stmt_info
;
3483 else if (modifier
== NARROW
)
3485 /* We don't define any narrowing conditional functions at present. */
3486 gcc_assert (mask_opno
< 0);
3487 for (j
= 0; j
< ncopies
; ++j
)
3489 /* Build argument list for the vectorized call. */
3491 vargs
.create (nargs
* 2);
3497 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3498 vec
<tree
> vec_oprnds0
;
3500 for (i
= 0; i
< nargs
; i
++)
3501 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3502 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3503 vec_oprnds0
= vec_defs
[0];
3505 /* Arguments are ready. Create the new vector stmt. */
3506 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3510 for (k
= 0; k
< nargs
; k
++)
3512 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3513 vargs
.quick_push (vec_oprndsk
[i
]);
3514 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3517 if (ifn
!= IFN_LAST
)
3518 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3520 call
= gimple_build_call_vec (fndecl
, vargs
);
3521 new_temp
= make_ssa_name (vec_dest
, call
);
3522 gimple_call_set_lhs (call
, new_temp
);
3523 gimple_call_set_nothrow (call
, true);
3525 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3526 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3529 for (i
= 0; i
< nargs
; i
++)
3531 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3532 vec_oprndsi
.release ();
3537 for (i
= 0; i
< nargs
; i
++)
3539 op
= gimple_call_arg (stmt
, i
);
3543 = vect_get_vec_def_for_operand (op
, stmt_info
);
3545 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3549 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3552 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3554 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3557 vargs
.quick_push (vec_oprnd0
);
3558 vargs
.quick_push (vec_oprnd1
);
3561 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3562 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3563 gimple_call_set_lhs (new_stmt
, new_temp
);
3565 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3568 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3570 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3572 prev_stmt_info
= new_stmt_info
;
3575 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3578 /* No current target implements this case. */
3583 /* The call in STMT might prevent it from being removed in dce.
3584 We however cannot remove it here, due to the way the ssa name
3585 it defines is mapped to the new definition. So just replace
3586 rhs of the statement with something harmless. */
3591 stmt_info
= vect_orig_stmt (stmt_info
);
3592 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3595 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3596 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3602 struct simd_call_arg_info
3606 HOST_WIDE_INT linear_step
;
3607 enum vect_def_type dt
;
3609 bool simd_lane_linear
;
3612 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3613 is linear within simd lane (but not within whole loop), note it in
3617 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3618 struct simd_call_arg_info
*arginfo
)
3620 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3622 if (!is_gimple_assign (def_stmt
)
3623 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3624 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3627 tree base
= gimple_assign_rhs1 (def_stmt
);
3628 HOST_WIDE_INT linear_step
= 0;
3629 tree v
= gimple_assign_rhs2 (def_stmt
);
3630 while (TREE_CODE (v
) == SSA_NAME
)
3633 def_stmt
= SSA_NAME_DEF_STMT (v
);
3634 if (is_gimple_assign (def_stmt
))
3635 switch (gimple_assign_rhs_code (def_stmt
))
3638 t
= gimple_assign_rhs2 (def_stmt
);
3639 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3641 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3642 v
= gimple_assign_rhs1 (def_stmt
);
3645 t
= gimple_assign_rhs2 (def_stmt
);
3646 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3648 linear_step
= tree_to_shwi (t
);
3649 v
= gimple_assign_rhs1 (def_stmt
);
3652 t
= gimple_assign_rhs1 (def_stmt
);
3653 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3654 || (TYPE_PRECISION (TREE_TYPE (v
))
3655 < TYPE_PRECISION (TREE_TYPE (t
))))
3664 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3666 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3667 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3672 arginfo
->linear_step
= linear_step
;
3674 arginfo
->simd_lane_linear
= true;
3680 /* Return the number of elements in vector type VECTYPE, which is associated
3681 with a SIMD clone. At present these vectors always have a constant
3684 static unsigned HOST_WIDE_INT
3685 simd_clone_subparts (tree vectype
)
3687 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3690 /* Function vectorizable_simd_clone_call.
3692 Check if STMT_INFO performs a function call that can be vectorized
3693 by calling a simd clone of the function.
3694 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3695 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3696 Return true if STMT_INFO is vectorizable in this way. */
3699 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3700 gimple_stmt_iterator
*gsi
,
3701 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3702 stmt_vector_for_cost
*)
3707 tree vec_oprnd0
= NULL_TREE
;
3708 stmt_vec_info prev_stmt_info
;
3710 unsigned int nunits
;
3711 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3712 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3713 vec_info
*vinfo
= stmt_info
->vinfo
;
3714 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3715 tree fndecl
, new_temp
;
3717 auto_vec
<simd_call_arg_info
> arginfo
;
3718 vec
<tree
> vargs
= vNULL
;
3720 tree lhs
, rtype
, ratype
;
3721 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3723 /* Is STMT a vectorizable call? */
3724 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3728 fndecl
= gimple_call_fndecl (stmt
);
3729 if (fndecl
== NULL_TREE
)
3732 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3733 if (node
== NULL
|| node
->simd_clones
== NULL
)
3736 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3739 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3743 if (gimple_call_lhs (stmt
)
3744 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3747 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3749 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3751 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3758 /* Process function arguments. */
3759 nargs
= gimple_call_num_args (stmt
);
3761 /* Bail out if the function has zero arguments. */
3765 arginfo
.reserve (nargs
, true);
3767 for (i
= 0; i
< nargs
; i
++)
3769 simd_call_arg_info thisarginfo
;
3772 thisarginfo
.linear_step
= 0;
3773 thisarginfo
.align
= 0;
3774 thisarginfo
.op
= NULL_TREE
;
3775 thisarginfo
.simd_lane_linear
= false;
3777 op
= gimple_call_arg (stmt
, i
);
3778 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3779 &thisarginfo
.vectype
)
3780 || thisarginfo
.dt
== vect_uninitialized_def
)
3782 if (dump_enabled_p ())
3783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3784 "use not simple.\n");
3788 if (thisarginfo
.dt
== vect_constant_def
3789 || thisarginfo
.dt
== vect_external_def
)
3790 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3792 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3794 /* For linear arguments, the analyze phase should have saved
3795 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3796 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3797 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3799 gcc_assert (vec_stmt
);
3800 thisarginfo
.linear_step
3801 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3803 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3804 thisarginfo
.simd_lane_linear
3805 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3806 == boolean_true_node
);
3807 /* If loop has been peeled for alignment, we need to adjust it. */
3808 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3809 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3810 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3812 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3813 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3814 tree opt
= TREE_TYPE (thisarginfo
.op
);
3815 bias
= fold_convert (TREE_TYPE (step
), bias
);
3816 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3818 = fold_build2 (POINTER_TYPE_P (opt
)
3819 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3820 thisarginfo
.op
, bias
);
3824 && thisarginfo
.dt
!= vect_constant_def
3825 && thisarginfo
.dt
!= vect_external_def
3827 && TREE_CODE (op
) == SSA_NAME
3828 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3830 && tree_fits_shwi_p (iv
.step
))
3832 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3833 thisarginfo
.op
= iv
.base
;
3835 else if ((thisarginfo
.dt
== vect_constant_def
3836 || thisarginfo
.dt
== vect_external_def
)
3837 && POINTER_TYPE_P (TREE_TYPE (op
)))
3838 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3839 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3841 if (POINTER_TYPE_P (TREE_TYPE (op
))
3842 && !thisarginfo
.linear_step
3844 && thisarginfo
.dt
!= vect_constant_def
3845 && thisarginfo
.dt
!= vect_external_def
3848 && TREE_CODE (op
) == SSA_NAME
)
3849 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3851 arginfo
.quick_push (thisarginfo
);
3854 unsigned HOST_WIDE_INT vf
;
3855 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3857 if (dump_enabled_p ())
3858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3859 "not considering SIMD clones; not yet supported"
3860 " for variable-width vectors.\n");
3864 unsigned int badness
= 0;
3865 struct cgraph_node
*bestn
= NULL
;
3866 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3867 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3869 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3870 n
= n
->simdclone
->next_clone
)
3872 unsigned int this_badness
= 0;
3873 if (n
->simdclone
->simdlen
> vf
3874 || n
->simdclone
->nargs
!= nargs
)
3876 if (n
->simdclone
->simdlen
< vf
)
3877 this_badness
+= (exact_log2 (vf
)
3878 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3879 if (n
->simdclone
->inbranch
)
3880 this_badness
+= 2048;
3881 int target_badness
= targetm
.simd_clone
.usable (n
);
3882 if (target_badness
< 0)
3884 this_badness
+= target_badness
* 512;
3885 /* FORNOW: Have to add code to add the mask argument. */
3886 if (n
->simdclone
->inbranch
)
3888 for (i
= 0; i
< nargs
; i
++)
3890 switch (n
->simdclone
->args
[i
].arg_type
)
3892 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3893 if (!useless_type_conversion_p
3894 (n
->simdclone
->args
[i
].orig_type
,
3895 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3897 else if (arginfo
[i
].dt
== vect_constant_def
3898 || arginfo
[i
].dt
== vect_external_def
3899 || arginfo
[i
].linear_step
)
3902 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3903 if (arginfo
[i
].dt
!= vect_constant_def
3904 && arginfo
[i
].dt
!= vect_external_def
)
3907 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3908 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3909 if (arginfo
[i
].dt
== vect_constant_def
3910 || arginfo
[i
].dt
== vect_external_def
3911 || (arginfo
[i
].linear_step
3912 != n
->simdclone
->args
[i
].linear_step
))
3915 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3917 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3918 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3919 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3920 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3924 case SIMD_CLONE_ARG_TYPE_MASK
:
3927 if (i
== (size_t) -1)
3929 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3934 if (arginfo
[i
].align
)
3935 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3936 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3938 if (i
== (size_t) -1)
3940 if (bestn
== NULL
|| this_badness
< badness
)
3943 badness
= this_badness
;
3950 for (i
= 0; i
< nargs
; i
++)
3951 if ((arginfo
[i
].dt
== vect_constant_def
3952 || arginfo
[i
].dt
== vect_external_def
)
3953 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3956 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3958 if (arginfo
[i
].vectype
== NULL
3959 || (simd_clone_subparts (arginfo
[i
].vectype
)
3960 > bestn
->simdclone
->simdlen
))
3964 fndecl
= bestn
->decl
;
3965 nunits
= bestn
->simdclone
->simdlen
;
3966 ncopies
= vf
/ nunits
;
3968 /* If the function isn't const, only allow it in simd loops where user
3969 has asserted that at least nunits consecutive iterations can be
3970 performed using SIMD instructions. */
3971 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3972 && gimple_vuse (stmt
))
3975 /* Sanity check: make sure that at least one copy of the vectorized stmt
3976 needs to be generated. */
3977 gcc_assert (ncopies
>= 1);
3979 if (!vec_stmt
) /* transformation not required. */
3981 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3982 for (i
= 0; i
< nargs
; i
++)
3983 if ((bestn
->simdclone
->args
[i
].arg_type
3984 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3985 || (bestn
->simdclone
->args
[i
].arg_type
3986 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3988 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3990 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3991 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3992 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3993 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3994 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3995 tree sll
= arginfo
[i
].simd_lane_linear
3996 ? boolean_true_node
: boolean_false_node
;
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3999 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4000 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4001 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4007 if (dump_enabled_p ())
4008 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4011 scalar_dest
= gimple_call_lhs (stmt
);
4012 vec_dest
= NULL_TREE
;
4017 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4018 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4019 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4022 rtype
= TREE_TYPE (ratype
);
4026 prev_stmt_info
= NULL
;
4027 for (j
= 0; j
< ncopies
; ++j
)
4029 /* Build argument list for the vectorized call. */
4031 vargs
.create (nargs
);
4035 for (i
= 0; i
< nargs
; i
++)
4037 unsigned int k
, l
, m
, o
;
4039 op
= gimple_call_arg (stmt
, i
);
4040 switch (bestn
->simdclone
->args
[i
].arg_type
)
4042 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4043 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4044 o
= nunits
/ simd_clone_subparts (atype
);
4045 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4047 if (simd_clone_subparts (atype
)
4048 < simd_clone_subparts (arginfo
[i
].vectype
))
4050 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4051 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4052 / simd_clone_subparts (atype
));
4053 gcc_assert ((k
& (k
- 1)) == 0);
4056 = vect_get_vec_def_for_operand (op
, stmt_info
);
4059 vec_oprnd0
= arginfo
[i
].op
;
4060 if ((m
& (k
- 1)) == 0)
4062 = vect_get_vec_def_for_stmt_copy (vinfo
,
4065 arginfo
[i
].op
= vec_oprnd0
;
4067 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4069 bitsize_int ((m
& (k
- 1)) * prec
));
4071 = gimple_build_assign (make_ssa_name (atype
),
4073 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4074 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4078 k
= (simd_clone_subparts (atype
)
4079 / simd_clone_subparts (arginfo
[i
].vectype
));
4080 gcc_assert ((k
& (k
- 1)) == 0);
4081 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4083 vec_alloc (ctor_elts
, k
);
4086 for (l
= 0; l
< k
; l
++)
4088 if (m
== 0 && l
== 0)
4090 = vect_get_vec_def_for_operand (op
, stmt_info
);
4093 = vect_get_vec_def_for_stmt_copy (vinfo
,
4095 arginfo
[i
].op
= vec_oprnd0
;
4098 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4102 vargs
.safe_push (vec_oprnd0
);
4105 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4107 = gimple_build_assign (make_ssa_name (atype
),
4109 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4111 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4116 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4117 vargs
.safe_push (op
);
4119 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4120 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4125 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4130 edge pe
= loop_preheader_edge (loop
);
4131 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4132 gcc_assert (!new_bb
);
4134 if (arginfo
[i
].simd_lane_linear
)
4136 vargs
.safe_push (arginfo
[i
].op
);
4139 tree phi_res
= copy_ssa_name (op
);
4140 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4141 loop_vinfo
->add_stmt (new_phi
);
4142 add_phi_arg (new_phi
, arginfo
[i
].op
,
4143 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4145 = POINTER_TYPE_P (TREE_TYPE (op
))
4146 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4147 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4148 ? sizetype
: TREE_TYPE (op
);
4150 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4152 tree tcst
= wide_int_to_tree (type
, cst
);
4153 tree phi_arg
= copy_ssa_name (op
);
4155 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4156 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4157 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4158 loop_vinfo
->add_stmt (new_stmt
);
4159 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4161 arginfo
[i
].op
= phi_res
;
4162 vargs
.safe_push (phi_res
);
4167 = POINTER_TYPE_P (TREE_TYPE (op
))
4168 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4169 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4170 ? sizetype
: TREE_TYPE (op
);
4172 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4174 tree tcst
= wide_int_to_tree (type
, cst
);
4175 new_temp
= make_ssa_name (TREE_TYPE (op
));
4177 = gimple_build_assign (new_temp
, code
,
4178 arginfo
[i
].op
, tcst
);
4179 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4180 vargs
.safe_push (new_temp
);
4183 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4186 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4187 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4188 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4194 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4197 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4199 new_temp
= create_tmp_var (ratype
);
4200 else if (simd_clone_subparts (vectype
)
4201 == simd_clone_subparts (rtype
))
4202 new_temp
= make_ssa_name (vec_dest
, new_call
);
4204 new_temp
= make_ssa_name (rtype
, new_call
);
4205 gimple_call_set_lhs (new_call
, new_temp
);
4207 stmt_vec_info new_stmt_info
4208 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4212 if (simd_clone_subparts (vectype
) < nunits
)
4215 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4216 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4217 k
= nunits
/ simd_clone_subparts (vectype
);
4218 gcc_assert ((k
& (k
- 1)) == 0);
4219 for (l
= 0; l
< k
; l
++)
4224 t
= build_fold_addr_expr (new_temp
);
4225 t
= build2 (MEM_REF
, vectype
, t
,
4226 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4229 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4230 bitsize_int (prec
), bitsize_int (l
* prec
));
4232 = gimple_build_assign (make_ssa_name (vectype
), t
);
4234 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4236 if (j
== 0 && l
== 0)
4237 STMT_VINFO_VEC_STMT (stmt_info
)
4238 = *vec_stmt
= new_stmt_info
;
4240 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4242 prev_stmt_info
= new_stmt_info
;
4246 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4249 else if (simd_clone_subparts (vectype
) > nunits
)
4251 unsigned int k
= (simd_clone_subparts (vectype
)
4252 / simd_clone_subparts (rtype
));
4253 gcc_assert ((k
& (k
- 1)) == 0);
4254 if ((j
& (k
- 1)) == 0)
4255 vec_alloc (ret_ctor_elts
, k
);
4258 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4259 for (m
= 0; m
< o
; m
++)
4261 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4262 size_int (m
), NULL_TREE
, NULL_TREE
);
4264 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4266 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4268 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4269 gimple_assign_lhs (new_stmt
));
4271 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4274 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4275 if ((j
& (k
- 1)) != k
- 1)
4277 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4279 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4281 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4283 if ((unsigned) j
== k
- 1)
4284 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4286 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4288 prev_stmt_info
= new_stmt_info
;
4293 tree t
= build_fold_addr_expr (new_temp
);
4294 t
= build2 (MEM_REF
, vectype
, t
,
4295 build_int_cst (TREE_TYPE (t
), 0));
4297 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4299 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4300 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4305 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4307 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4309 prev_stmt_info
= new_stmt_info
;
4314 /* The call in STMT might prevent it from being removed in dce.
4315 We however cannot remove it here, due to the way the ssa name
4316 it defines is mapped to the new definition. So just replace
4317 rhs of the statement with something harmless. */
4325 type
= TREE_TYPE (scalar_dest
);
4326 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4327 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4330 new_stmt
= gimple_build_nop ();
4331 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4332 unlink_stmt_vdef (stmt
);
4338 /* Function vect_gen_widened_results_half
4340 Create a vector stmt whose code, type, number of arguments, and result
4341 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4342 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4343 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4344 needs to be created (DECL is a function-decl of a target-builtin).
4345 STMT_INFO is the original scalar stmt that we are vectorizing. */
4348 vect_gen_widened_results_half (enum tree_code code
,
4350 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4351 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4352 stmt_vec_info stmt_info
)
4357 /* Generate half of the widened result: */
4358 if (code
== CALL_EXPR
)
4360 /* Target specific support */
4361 if (op_type
== binary_op
)
4362 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4364 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4365 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4366 gimple_call_set_lhs (new_stmt
, new_temp
);
4370 /* Generic support */
4371 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4372 if (op_type
!= binary_op
)
4374 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4375 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4376 gimple_assign_set_lhs (new_stmt
, new_temp
);
4378 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4384 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4385 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4386 containing scalar operand), and for the rest we get a copy with
4387 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4388 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4389 The vectors are collected into VEC_OPRNDS. */
4392 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4393 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4395 vec_info
*vinfo
= stmt_info
->vinfo
;
4398 /* Get first vector operand. */
4399 /* All the vector operands except the very first one (that is scalar oprnd)
4401 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4402 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4404 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4406 vec_oprnds
->quick_push (vec_oprnd
);
4408 /* Get second vector operand. */
4409 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4410 vec_oprnds
->quick_push (vec_oprnd
);
4414 /* For conversion in multiple steps, continue to get operands
4417 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4418 multi_step_cvt
- 1);
4422 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4423 For multi-step conversions store the resulting vectors and call the function
4427 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4429 stmt_vec_info stmt_info
,
4431 gimple_stmt_iterator
*gsi
,
4432 slp_tree slp_node
, enum tree_code code
,
4433 stmt_vec_info
*prev_stmt_info
)
4436 tree vop0
, vop1
, new_tmp
, vec_dest
;
4438 vec_dest
= vec_dsts
.pop ();
4440 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4442 /* Create demotion operation. */
4443 vop0
= (*vec_oprnds
)[i
];
4444 vop1
= (*vec_oprnds
)[i
+ 1];
4445 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4446 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4447 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4448 stmt_vec_info new_stmt_info
4449 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4452 /* Store the resulting vector for next recursive call. */
4453 (*vec_oprnds
)[i
/2] = new_tmp
;
4456 /* This is the last step of the conversion sequence. Store the
4457 vectors in SLP_NODE or in vector info of the scalar statement
4458 (or in STMT_VINFO_RELATED_STMT chain). */
4460 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4463 if (!*prev_stmt_info
)
4464 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4466 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4468 *prev_stmt_info
= new_stmt_info
;
4473 /* For multi-step demotion operations we first generate demotion operations
4474 from the source type to the intermediate types, and then combine the
4475 results (stored in VEC_OPRNDS) in demotion operation to the destination
4479 /* At each level of recursion we have half of the operands we had at the
4481 vec_oprnds
->truncate ((i
+1)/2);
4482 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4483 stmt_info
, vec_dsts
, gsi
,
4484 slp_node
, VEC_PACK_TRUNC_EXPR
,
4488 vec_dsts
.quick_push (vec_dest
);
4492 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4493 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4494 STMT_INFO. For multi-step conversions store the resulting vectors and
4495 call the function recursively. */
4498 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4499 vec
<tree
> *vec_oprnds1
,
4500 stmt_vec_info stmt_info
, tree vec_dest
,
4501 gimple_stmt_iterator
*gsi
,
4502 enum tree_code code1
,
4503 enum tree_code code2
, tree decl1
,
4504 tree decl2
, int op_type
)
4507 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4508 gimple
*new_stmt1
, *new_stmt2
;
4509 vec
<tree
> vec_tmp
= vNULL
;
4511 vec_tmp
.create (vec_oprnds0
->length () * 2);
4512 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4514 if (op_type
== binary_op
)
4515 vop1
= (*vec_oprnds1
)[i
];
4519 /* Generate the two halves of promotion operation. */
4520 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4521 op_type
, vec_dest
, gsi
,
4523 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4524 op_type
, vec_dest
, gsi
,
4526 if (is_gimple_call (new_stmt1
))
4528 new_tmp1
= gimple_call_lhs (new_stmt1
);
4529 new_tmp2
= gimple_call_lhs (new_stmt2
);
4533 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4534 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4537 /* Store the results for the next step. */
4538 vec_tmp
.quick_push (new_tmp1
);
4539 vec_tmp
.quick_push (new_tmp2
);
4542 vec_oprnds0
->release ();
4543 *vec_oprnds0
= vec_tmp
;
4547 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4548 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4549 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4550 Return true if STMT_INFO is vectorizable in this way. */
4553 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4554 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4555 stmt_vector_for_cost
*cost_vec
)
4559 tree op0
, op1
= NULL_TREE
;
4560 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4561 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4562 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4563 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4564 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4566 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4568 stmt_vec_info prev_stmt_info
;
4569 poly_uint64 nunits_in
;
4570 poly_uint64 nunits_out
;
4571 tree vectype_out
, vectype_in
;
4573 tree lhs_type
, rhs_type
;
4574 enum { NARROW
, NONE
, WIDEN
} modifier
;
4575 vec
<tree
> vec_oprnds0
= vNULL
;
4576 vec
<tree
> vec_oprnds1
= vNULL
;
4578 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4579 vec_info
*vinfo
= stmt_info
->vinfo
;
4580 int multi_step_cvt
= 0;
4581 vec
<tree
> interm_types
= vNULL
;
4582 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4584 unsigned short fltsz
;
4586 /* Is STMT a vectorizable conversion? */
4588 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4591 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4595 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4599 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4602 code
= gimple_assign_rhs_code (stmt
);
4603 if (!CONVERT_EXPR_CODE_P (code
)
4604 && code
!= FIX_TRUNC_EXPR
4605 && code
!= FLOAT_EXPR
4606 && code
!= WIDEN_MULT_EXPR
4607 && code
!= WIDEN_LSHIFT_EXPR
)
4610 op_type
= TREE_CODE_LENGTH (code
);
4612 /* Check types of lhs and rhs. */
4613 scalar_dest
= gimple_assign_lhs (stmt
);
4614 lhs_type
= TREE_TYPE (scalar_dest
);
4615 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4617 op0
= gimple_assign_rhs1 (stmt
);
4618 rhs_type
= TREE_TYPE (op0
);
4620 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4621 && !((INTEGRAL_TYPE_P (lhs_type
)
4622 && INTEGRAL_TYPE_P (rhs_type
))
4623 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4624 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4627 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4628 && ((INTEGRAL_TYPE_P (lhs_type
)
4629 && !type_has_mode_precision_p (lhs_type
))
4630 || (INTEGRAL_TYPE_P (rhs_type
)
4631 && !type_has_mode_precision_p (rhs_type
))))
4633 if (dump_enabled_p ())
4634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4635 "type conversion to/from bit-precision unsupported."
4640 /* Check the operands of the operation. */
4641 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4645 "use not simple.\n");
4648 if (op_type
== binary_op
)
4652 op1
= gimple_assign_rhs2 (stmt
);
4653 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4654 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4656 if (CONSTANT_CLASS_P (op0
))
4657 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4659 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4663 if (dump_enabled_p ())
4664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4665 "use not simple.\n");
4670 /* If op0 is an external or constant defs use a vector type of
4671 the same size as the output vector type. */
4673 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4675 gcc_assert (vectype_in
);
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4680 "no vectype for scalar type %T\n", rhs_type
);
4685 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4686 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4690 "can't convert between boolean and non "
4691 "boolean vectors %T\n", rhs_type
);
4696 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4697 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4698 if (known_eq (nunits_out
, nunits_in
))
4700 else if (multiple_p (nunits_out
, nunits_in
))
4704 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4708 /* Multiple types in SLP are handled by creating the appropriate number of
4709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4713 else if (modifier
== NARROW
)
4714 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4716 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4718 /* Sanity check: make sure that at least one copy of the vectorized stmt
4719 needs to be generated. */
4720 gcc_assert (ncopies
>= 1);
4722 bool found_mode
= false;
4723 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4724 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4725 opt_scalar_mode rhs_mode_iter
;
4727 /* Supportable by target? */
4731 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4733 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4738 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4740 "conversion not supported by target.\n");
4744 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4745 vectype_in
, &code1
, &code2
,
4746 &multi_step_cvt
, &interm_types
))
4748 /* Binary widening operation can only be supported directly by the
4750 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4754 if (code
!= FLOAT_EXPR
4755 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4758 fltsz
= GET_MODE_SIZE (lhs_mode
);
4759 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4761 rhs_mode
= rhs_mode_iter
.require ();
4762 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4766 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4767 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4768 if (cvt_type
== NULL_TREE
)
4771 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4773 if (!supportable_convert_operation (code
, vectype_out
,
4774 cvt_type
, &decl1
, &codecvt1
))
4777 else if (!supportable_widening_operation (code
, stmt_info
,
4778 vectype_out
, cvt_type
,
4779 &codecvt1
, &codecvt2
,
4784 gcc_assert (multi_step_cvt
== 0);
4786 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4787 vectype_in
, &code1
, &code2
,
4788 &multi_step_cvt
, &interm_types
))
4798 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4799 codecvt2
= ERROR_MARK
;
4803 interm_types
.safe_push (cvt_type
);
4804 cvt_type
= NULL_TREE
;
4809 gcc_assert (op_type
== unary_op
);
4810 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4811 &code1
, &multi_step_cvt
,
4815 if (code
!= FIX_TRUNC_EXPR
4816 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4820 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4821 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4822 if (cvt_type
== NULL_TREE
)
4824 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4827 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4828 &code1
, &multi_step_cvt
,
4837 if (!vec_stmt
) /* transformation not required. */
4839 DUMP_VECT_SCOPE ("vectorizable_conversion");
4840 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4842 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4843 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4846 else if (modifier
== NARROW
)
4848 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4849 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4854 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4855 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4858 interm_types
.release ();
4863 if (dump_enabled_p ())
4864 dump_printf_loc (MSG_NOTE
, vect_location
,
4865 "transform conversion. ncopies = %d.\n", ncopies
);
4867 if (op_type
== binary_op
)
4869 if (CONSTANT_CLASS_P (op0
))
4870 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4871 else if (CONSTANT_CLASS_P (op1
))
4872 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4875 /* In case of multi-step conversion, we first generate conversion operations
4876 to the intermediate types, and then from that types to the final one.
4877 We create vector destinations for the intermediate type (TYPES) received
4878 from supportable_*_operation, and store them in the correct order
4879 for future use in vect_create_vectorized_*_stmts (). */
4880 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4881 vec_dest
= vect_create_destination_var (scalar_dest
,
4882 (cvt_type
&& modifier
== WIDEN
)
4883 ? cvt_type
: vectype_out
);
4884 vec_dsts
.quick_push (vec_dest
);
4888 for (i
= interm_types
.length () - 1;
4889 interm_types
.iterate (i
, &intermediate_type
); i
--)
4891 vec_dest
= vect_create_destination_var (scalar_dest
,
4893 vec_dsts
.quick_push (vec_dest
);
4898 vec_dest
= vect_create_destination_var (scalar_dest
,
4900 ? vectype_out
: cvt_type
);
4904 if (modifier
== WIDEN
)
4906 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4907 if (op_type
== binary_op
)
4908 vec_oprnds1
.create (1);
4910 else if (modifier
== NARROW
)
4911 vec_oprnds0
.create (
4912 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4914 else if (code
== WIDEN_LSHIFT_EXPR
)
4915 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4918 prev_stmt_info
= NULL
;
4922 for (j
= 0; j
< ncopies
; j
++)
4925 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
4928 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
4930 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4932 stmt_vec_info new_stmt_info
;
4933 /* Arguments are ready, create the new vector stmt. */
4934 if (code1
== CALL_EXPR
)
4936 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4937 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4938 gimple_call_set_lhs (new_stmt
, new_temp
);
4940 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4944 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4946 = gimple_build_assign (vec_dest
, code1
, vop0
);
4947 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4948 gimple_assign_set_lhs (new_stmt
, new_temp
);
4950 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4954 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4957 if (!prev_stmt_info
)
4958 STMT_VINFO_VEC_STMT (stmt_info
)
4959 = *vec_stmt
= new_stmt_info
;
4961 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4962 prev_stmt_info
= new_stmt_info
;
4969 /* In case the vectorization factor (VF) is bigger than the number
4970 of elements that we can fit in a vectype (nunits), we have to
4971 generate more than one vector stmt - i.e - we need to "unroll"
4972 the vector stmt by a factor VF/nunits. */
4973 for (j
= 0; j
< ncopies
; j
++)
4980 if (code
== WIDEN_LSHIFT_EXPR
)
4985 /* Store vec_oprnd1 for every vector stmt to be created
4986 for SLP_NODE. We check during the analysis that all
4987 the shift arguments are the same. */
4988 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4989 vec_oprnds1
.quick_push (vec_oprnd1
);
4991 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
4992 &vec_oprnds0
, NULL
, slp_node
);
4995 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
4996 &vec_oprnds1
, slp_node
);
5000 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5001 vec_oprnds0
.quick_push (vec_oprnd0
);
5002 if (op_type
== binary_op
)
5004 if (code
== WIDEN_LSHIFT_EXPR
)
5008 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5009 vec_oprnds1
.quick_push (vec_oprnd1
);
5015 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5016 vec_oprnds0
.truncate (0);
5017 vec_oprnds0
.quick_push (vec_oprnd0
);
5018 if (op_type
== binary_op
)
5020 if (code
== WIDEN_LSHIFT_EXPR
)
5023 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5025 vec_oprnds1
.truncate (0);
5026 vec_oprnds1
.quick_push (vec_oprnd1
);
5030 /* Arguments are ready. Create the new vector stmts. */
5031 for (i
= multi_step_cvt
; i
>= 0; i
--)
5033 tree this_dest
= vec_dsts
[i
];
5034 enum tree_code c1
= code1
, c2
= code2
;
5035 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5040 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5041 &vec_oprnds1
, stmt_info
,
5043 c1
, c2
, decl1
, decl2
,
5047 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5049 stmt_vec_info new_stmt_info
;
5052 if (codecvt1
== CALL_EXPR
)
5054 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5055 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5056 gimple_call_set_lhs (new_stmt
, new_temp
);
5058 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5063 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5064 new_temp
= make_ssa_name (vec_dest
);
5066 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5068 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5073 new_stmt_info
= vinfo
->lookup_def (vop0
);
5076 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5079 if (!prev_stmt_info
)
5080 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5082 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5083 prev_stmt_info
= new_stmt_info
;
5088 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5092 /* In case the vectorization factor (VF) is bigger than the number
5093 of elements that we can fit in a vectype (nunits), we have to
5094 generate more than one vector stmt - i.e - we need to "unroll"
5095 the vector stmt by a factor VF/nunits. */
5096 for (j
= 0; j
< ncopies
; j
++)
5100 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5104 vec_oprnds0
.truncate (0);
5105 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5106 vect_pow2 (multi_step_cvt
) - 1);
5109 /* Arguments are ready. Create the new vector stmts. */
5111 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5113 if (codecvt1
== CALL_EXPR
)
5115 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5116 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5117 gimple_call_set_lhs (new_stmt
, new_temp
);
5118 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5122 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5123 new_temp
= make_ssa_name (vec_dest
);
5125 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5126 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5129 vec_oprnds0
[i
] = new_temp
;
5132 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5133 stmt_info
, vec_dsts
, gsi
,
5138 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5142 vec_oprnds0
.release ();
5143 vec_oprnds1
.release ();
5144 interm_types
.release ();
5150 /* Function vectorizable_assignment.
5152 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5153 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5154 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5155 Return true if STMT_INFO is vectorizable in this way. */
5158 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5159 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5160 stmt_vector_for_cost
*cost_vec
)
5165 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5167 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5171 vec
<tree
> vec_oprnds
= vNULL
;
5173 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5174 vec_info
*vinfo
= stmt_info
->vinfo
;
5175 stmt_vec_info prev_stmt_info
= NULL
;
5176 enum tree_code code
;
5179 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5182 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5186 /* Is vectorizable assignment? */
5187 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5191 scalar_dest
= gimple_assign_lhs (stmt
);
5192 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5195 code
= gimple_assign_rhs_code (stmt
);
5196 if (gimple_assign_single_p (stmt
)
5197 || code
== PAREN_EXPR
5198 || CONVERT_EXPR_CODE_P (code
))
5199 op
= gimple_assign_rhs1 (stmt
);
5203 if (code
== VIEW_CONVERT_EXPR
)
5204 op
= TREE_OPERAND (op
, 0);
5206 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5207 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5209 /* Multiple types in SLP are handled by creating the appropriate number of
5210 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5215 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5217 gcc_assert (ncopies
>= 1);
5219 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5221 if (dump_enabled_p ())
5222 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5223 "use not simple.\n");
5227 /* We can handle NOP_EXPR conversions that do not change the number
5228 of elements or the vector size. */
5229 if ((CONVERT_EXPR_CODE_P (code
)
5230 || code
== VIEW_CONVERT_EXPR
)
5232 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5233 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5234 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5237 /* We do not handle bit-precision changes. */
5238 if ((CONVERT_EXPR_CODE_P (code
)
5239 || code
== VIEW_CONVERT_EXPR
)
5240 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5241 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5242 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5243 /* But a conversion that does not change the bit-pattern is ok. */
5244 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5245 > TYPE_PRECISION (TREE_TYPE (op
)))
5246 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5247 /* Conversion between boolean types of different sizes is
5248 a simple assignment in case their vectypes are same
5250 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5251 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5253 if (dump_enabled_p ())
5254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5255 "type conversion to/from bit-precision "
5260 if (!vec_stmt
) /* transformation not required. */
5262 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5263 DUMP_VECT_SCOPE ("vectorizable_assignment");
5264 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5269 if (dump_enabled_p ())
5270 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5273 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5276 for (j
= 0; j
< ncopies
; j
++)
5280 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5282 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5284 /* Arguments are ready. create the new vector stmt. */
5285 stmt_vec_info new_stmt_info
= NULL
;
5286 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5288 if (CONVERT_EXPR_CODE_P (code
)
5289 || code
== VIEW_CONVERT_EXPR
)
5290 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5291 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5292 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5293 gimple_assign_set_lhs (new_stmt
, new_temp
);
5295 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5297 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5304 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5306 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5308 prev_stmt_info
= new_stmt_info
;
5311 vec_oprnds
.release ();
5316 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5317 either as shift by a scalar or by a vector. */
5320 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5323 machine_mode vec_mode
;
5328 vectype
= get_vectype_for_scalar_type (scalar_type
);
5332 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5334 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5336 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5338 || (optab_handler (optab
, TYPE_MODE (vectype
))
5339 == CODE_FOR_nothing
))
5343 vec_mode
= TYPE_MODE (vectype
);
5344 icode
= (int) optab_handler (optab
, vec_mode
);
5345 if (icode
== CODE_FOR_nothing
)
5352 /* Function vectorizable_shift.
5354 Check if STMT_INFO performs a shift operation that can be vectorized.
5355 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5356 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5357 Return true if STMT_INFO is vectorizable in this way. */
5360 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5361 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5362 stmt_vector_for_cost
*cost_vec
)
5366 tree op0
, op1
= NULL
;
5367 tree vec_oprnd1
= NULL_TREE
;
5369 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5370 enum tree_code code
;
5371 machine_mode vec_mode
;
5375 machine_mode optab_op2_mode
;
5376 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5378 stmt_vec_info prev_stmt_info
;
5379 poly_uint64 nunits_in
;
5380 poly_uint64 nunits_out
;
5385 vec
<tree
> vec_oprnds0
= vNULL
;
5386 vec
<tree
> vec_oprnds1
= vNULL
;
5389 bool scalar_shift_arg
= true;
5390 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5391 vec_info
*vinfo
= stmt_info
->vinfo
;
5393 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5396 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5397 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5401 /* Is STMT a vectorizable binary/unary operation? */
5402 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5406 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5409 code
= gimple_assign_rhs_code (stmt
);
5411 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5412 || code
== RROTATE_EXPR
))
5415 scalar_dest
= gimple_assign_lhs (stmt
);
5416 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5417 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5421 "bit-precision shifts not supported.\n");
5425 op0
= gimple_assign_rhs1 (stmt
);
5426 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5428 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5430 "use not simple.\n");
5433 /* If op0 is an external or constant def use a vector type with
5434 the same size as the output vector type. */
5436 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5438 gcc_assert (vectype
);
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5443 "no vectype for scalar type\n");
5447 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5448 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5449 if (maybe_ne (nunits_out
, nunits_in
))
5452 op1
= gimple_assign_rhs2 (stmt
);
5453 stmt_vec_info op1_def_stmt_info
;
5454 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5455 &op1_def_stmt_info
))
5457 if (dump_enabled_p ())
5458 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5459 "use not simple.\n");
5463 /* Multiple types in SLP are handled by creating the appropriate number of
5464 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5469 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5471 gcc_assert (ncopies
>= 1);
5473 /* Determine whether the shift amount is a vector, or scalar. If the
5474 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5476 if ((dt
[1] == vect_internal_def
5477 || dt
[1] == vect_induction_def
5478 || dt
[1] == vect_nested_cycle
)
5480 scalar_shift_arg
= false;
5481 else if (dt
[1] == vect_constant_def
5482 || dt
[1] == vect_external_def
5483 || dt
[1] == vect_internal_def
)
5485 /* In SLP, need to check whether the shift count is the same,
5486 in loops if it is a constant or invariant, it is always
5490 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5491 stmt_vec_info slpstmt_info
;
5493 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5495 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5496 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5497 scalar_shift_arg
= false;
5501 /* If the shift amount is computed by a pattern stmt we cannot
5502 use the scalar amount directly thus give up and use a vector
5504 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5505 scalar_shift_arg
= false;
5509 if (dump_enabled_p ())
5510 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5511 "operand mode requires invariant argument.\n");
5515 /* Vector shifted by vector. */
5516 if (!scalar_shift_arg
)
5518 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5519 if (dump_enabled_p ())
5520 dump_printf_loc (MSG_NOTE
, vect_location
,
5521 "vector/vector shift/rotate found.\n");
5524 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5525 if (op1_vectype
== NULL_TREE
5526 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5528 if (dump_enabled_p ())
5529 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5530 "unusable type for last operand in"
5531 " vector/vector shift/rotate.\n");
5535 /* See if the machine has a vector shifted by scalar insn and if not
5536 then see if it has a vector shifted by vector insn. */
5539 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5541 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_NOTE
, vect_location
,
5545 "vector/scalar shift/rotate found.\n");
5549 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5551 && (optab_handler (optab
, TYPE_MODE (vectype
))
5552 != CODE_FOR_nothing
))
5554 scalar_shift_arg
= false;
5556 if (dump_enabled_p ())
5557 dump_printf_loc (MSG_NOTE
, vect_location
,
5558 "vector/vector shift/rotate found.\n");
5560 /* Unlike the other binary operators, shifts/rotates have
5561 the rhs being int, instead of the same type as the lhs,
5562 so make sure the scalar is the right type if we are
5563 dealing with vectors of long long/long/short/char. */
5564 if (dt
[1] == vect_constant_def
)
5565 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5566 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5570 && TYPE_MODE (TREE_TYPE (vectype
))
5571 != TYPE_MODE (TREE_TYPE (op1
)))
5573 if (dump_enabled_p ())
5574 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5575 "unusable type for last operand in"
5576 " vector/vector shift/rotate.\n");
5579 if (vec_stmt
&& !slp_node
)
5581 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5582 op1
= vect_init_vector (stmt_info
, op1
,
5583 TREE_TYPE (vectype
), NULL
);
5590 /* Supportable by target? */
5593 if (dump_enabled_p ())
5594 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5598 vec_mode
= TYPE_MODE (vectype
);
5599 icode
= (int) optab_handler (optab
, vec_mode
);
5600 if (icode
== CODE_FOR_nothing
)
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5604 "op not supported by target.\n");
5605 /* Check only during analysis. */
5606 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5608 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_NOTE
, vect_location
,
5612 "proceeding using word mode.\n");
5615 /* Worthwhile without SIMD support? Check only during analysis. */
5617 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5618 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5622 "not worthwhile without SIMD support.\n");
5626 if (!vec_stmt
) /* transformation not required. */
5628 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5629 DUMP_VECT_SCOPE ("vectorizable_shift");
5630 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5636 if (dump_enabled_p ())
5637 dump_printf_loc (MSG_NOTE
, vect_location
,
5638 "transform binary/unary operation.\n");
5641 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5643 prev_stmt_info
= NULL
;
5644 for (j
= 0; j
< ncopies
; j
++)
5649 if (scalar_shift_arg
)
5651 /* Vector shl and shr insn patterns can be defined with scalar
5652 operand 2 (shift operand). In this case, use constant or loop
5653 invariant op1 directly, without extending it to vector mode
5655 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5656 if (!VECTOR_MODE_P (optab_op2_mode
))
5658 if (dump_enabled_p ())
5659 dump_printf_loc (MSG_NOTE
, vect_location
,
5660 "operand 1 using scalar mode.\n");
5662 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5663 vec_oprnds1
.quick_push (vec_oprnd1
);
5666 /* Store vec_oprnd1 for every vector stmt to be created
5667 for SLP_NODE. We check during the analysis that all
5668 the shift arguments are the same.
5669 TODO: Allow different constants for different vector
5670 stmts generated for an SLP instance. */
5671 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5672 vec_oprnds1
.quick_push (vec_oprnd1
);
5677 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5678 (a special case for certain kind of vector shifts); otherwise,
5679 operand 1 should be of a vector type (the usual case). */
5681 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5684 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5688 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5690 /* Arguments are ready. Create the new vector stmt. */
5691 stmt_vec_info new_stmt_info
= NULL
;
5692 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5694 vop1
= vec_oprnds1
[i
];
5695 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5696 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5697 gimple_assign_set_lhs (new_stmt
, new_temp
);
5699 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5701 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5708 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5710 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5711 prev_stmt_info
= new_stmt_info
;
5714 vec_oprnds0
.release ();
5715 vec_oprnds1
.release ();
5721 /* Function vectorizable_operation.
5723 Check if STMT_INFO performs a binary, unary or ternary operation that can
5725 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5726 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5727 Return true if STMT_INFO is vectorizable in this way. */
5730 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5731 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5732 stmt_vector_for_cost
*cost_vec
)
5736 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5738 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5739 enum tree_code code
, orig_code
;
5740 machine_mode vec_mode
;
5744 bool target_support_p
;
5745 enum vect_def_type dt
[3]
5746 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5748 stmt_vec_info prev_stmt_info
;
5749 poly_uint64 nunits_in
;
5750 poly_uint64 nunits_out
;
5754 vec
<tree
> vec_oprnds0
= vNULL
;
5755 vec
<tree
> vec_oprnds1
= vNULL
;
5756 vec
<tree
> vec_oprnds2
= vNULL
;
5757 tree vop0
, vop1
, vop2
;
5758 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5759 vec_info
*vinfo
= stmt_info
->vinfo
;
5761 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5764 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5768 /* Is STMT a vectorizable binary/unary operation? */
5769 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5773 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5776 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5778 /* For pointer addition and subtraction, we should use the normal
5779 plus and minus for the vector operation. */
5780 if (code
== POINTER_PLUS_EXPR
)
5782 if (code
== POINTER_DIFF_EXPR
)
5785 /* Support only unary or binary operations. */
5786 op_type
= TREE_CODE_LENGTH (code
);
5787 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5791 "num. args = %d (not unary/binary/ternary op).\n",
5796 scalar_dest
= gimple_assign_lhs (stmt
);
5797 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5799 /* Most operations cannot handle bit-precision types without extra
5801 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5802 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5803 /* Exception are bitwise binary operations. */
5804 && code
!= BIT_IOR_EXPR
5805 && code
!= BIT_XOR_EXPR
5806 && code
!= BIT_AND_EXPR
)
5808 if (dump_enabled_p ())
5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5810 "bit-precision arithmetic not supported.\n");
5814 op0
= gimple_assign_rhs1 (stmt
);
5815 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5817 if (dump_enabled_p ())
5818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5819 "use not simple.\n");
5822 /* If op0 is an external or constant def use a vector type with
5823 the same size as the output vector type. */
5826 /* For boolean type we cannot determine vectype by
5827 invariant value (don't know whether it is a vector
5828 of booleans or vector of integers). We use output
5829 vectype because operations on boolean don't change
5831 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5833 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5835 if (dump_enabled_p ())
5836 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5837 "not supported operation on bool value.\n");
5840 vectype
= vectype_out
;
5843 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5846 gcc_assert (vectype
);
5849 if (dump_enabled_p ())
5850 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5851 "no vectype for scalar type %T\n",
5857 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5858 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5859 if (maybe_ne (nunits_out
, nunits_in
))
5862 if (op_type
== binary_op
|| op_type
== ternary_op
)
5864 op1
= gimple_assign_rhs2 (stmt
);
5865 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
5867 if (dump_enabled_p ())
5868 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5869 "use not simple.\n");
5873 if (op_type
== ternary_op
)
5875 op2
= gimple_assign_rhs3 (stmt
);
5876 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
5878 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5880 "use not simple.\n");
5885 /* Multiple types in SLP are handled by creating the appropriate number of
5886 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5891 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5893 gcc_assert (ncopies
>= 1);
5895 /* Shifts are handled in vectorizable_shift (). */
5896 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5897 || code
== RROTATE_EXPR
)
5900 /* Supportable by target? */
5902 vec_mode
= TYPE_MODE (vectype
);
5903 if (code
== MULT_HIGHPART_EXPR
)
5904 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5907 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5915 target_support_p
= (optab_handler (optab
, vec_mode
)
5916 != CODE_FOR_nothing
);
5919 if (!target_support_p
)
5921 if (dump_enabled_p ())
5922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5923 "op not supported by target.\n");
5924 /* Check only during analysis. */
5925 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5926 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5928 if (dump_enabled_p ())
5929 dump_printf_loc (MSG_NOTE
, vect_location
,
5930 "proceeding using word mode.\n");
5933 /* Worthwhile without SIMD support? Check only during analysis. */
5934 if (!VECTOR_MODE_P (vec_mode
)
5936 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5938 if (dump_enabled_p ())
5939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5940 "not worthwhile without SIMD support.\n");
5944 if (!vec_stmt
) /* transformation not required. */
5946 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5947 DUMP_VECT_SCOPE ("vectorizable_operation");
5948 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5954 if (dump_enabled_p ())
5955 dump_printf_loc (MSG_NOTE
, vect_location
,
5956 "transform binary/unary operation.\n");
5958 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5959 vectors with unsigned elements, but the result is signed. So, we
5960 need to compute the MINUS_EXPR into vectype temporary and
5961 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5962 tree vec_cvt_dest
= NULL_TREE
;
5963 if (orig_code
== POINTER_DIFF_EXPR
)
5965 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5966 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5970 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5972 /* In case the vectorization factor (VF) is bigger than the number
5973 of elements that we can fit in a vectype (nunits), we have to generate
5974 more than one vector stmt - i.e - we need to "unroll" the
5975 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5976 from one copy of the vector stmt to the next, in the field
5977 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5978 stages to find the correct vector defs to be used when vectorizing
5979 stmts that use the defs of the current stmt. The example below
5980 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5981 we need to create 4 vectorized stmts):
5983 before vectorization:
5984 RELATED_STMT VEC_STMT
5988 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5990 RELATED_STMT VEC_STMT
5991 VS1_0: vx0 = memref0 VS1_1 -
5992 VS1_1: vx1 = memref1 VS1_2 -
5993 VS1_2: vx2 = memref2 VS1_3 -
5994 VS1_3: vx3 = memref3 - -
5995 S1: x = load - VS1_0
5998 step2: vectorize stmt S2 (done here):
5999 To vectorize stmt S2 we first need to find the relevant vector
6000 def for the first operand 'x'. This is, as usual, obtained from
6001 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6002 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6003 relevant vector def 'vx0'. Having found 'vx0' we can generate
6004 the vector stmt VS2_0, and as usual, record it in the
6005 STMT_VINFO_VEC_STMT of stmt S2.
6006 When creating the second copy (VS2_1), we obtain the relevant vector
6007 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6008 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6009 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6010 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6011 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6012 chain of stmts and pointers:
6013 RELATED_STMT VEC_STMT
6014 VS1_0: vx0 = memref0 VS1_1 -
6015 VS1_1: vx1 = memref1 VS1_2 -
6016 VS1_2: vx2 = memref2 VS1_3 -
6017 VS1_3: vx3 = memref3 - -
6018 S1: x = load - VS1_0
6019 VS2_0: vz0 = vx0 + v1 VS2_1 -
6020 VS2_1: vz1 = vx1 + v1 VS2_2 -
6021 VS2_2: vz2 = vx2 + v1 VS2_3 -
6022 VS2_3: vz3 = vx3 + v1 - -
6023 S2: z = x + 1 - VS2_0 */
6025 prev_stmt_info
= NULL
;
6026 for (j
= 0; j
< ncopies
; j
++)
6031 if (op_type
== binary_op
)
6032 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6034 else if (op_type
== ternary_op
)
6038 auto_vec
<tree
> ops(3);
6039 ops
.quick_push (op0
);
6040 ops
.quick_push (op1
);
6041 ops
.quick_push (op2
);
6042 auto_vec
<vec
<tree
> > vec_defs(3);
6043 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
6044 vec_oprnds0
= vec_defs
[0];
6045 vec_oprnds1
= vec_defs
[1];
6046 vec_oprnds2
= vec_defs
[2];
6050 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6051 &vec_oprnds1
, NULL
);
6052 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6057 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6062 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6063 if (op_type
== ternary_op
)
6065 tree vec_oprnd
= vec_oprnds2
.pop ();
6066 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6071 /* Arguments are ready. Create the new vector stmt. */
6072 stmt_vec_info new_stmt_info
= NULL
;
6073 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6075 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6076 ? vec_oprnds1
[i
] : NULL_TREE
);
6077 vop2
= ((op_type
== ternary_op
)
6078 ? vec_oprnds2
[i
] : NULL_TREE
);
6079 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6081 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6082 gimple_assign_set_lhs (new_stmt
, new_temp
);
6084 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6087 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6089 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6091 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6092 gimple_assign_set_lhs (new_stmt
, new_temp
);
6094 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6097 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6104 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6106 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6107 prev_stmt_info
= new_stmt_info
;
6110 vec_oprnds0
.release ();
6111 vec_oprnds1
.release ();
6112 vec_oprnds2
.release ();
6117 /* A helper function to ensure data reference DR_INFO's base alignment. */
6120 ensure_base_align (dr_vec_info
*dr_info
)
6122 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6125 if (dr_info
->base_misaligned
)
6127 tree base_decl
= dr_info
->base_decl
;
6129 unsigned int align_base_to
6130 = DR_TARGET_ALIGNMENT (dr_info
) * BITS_PER_UNIT
;
6132 if (decl_in_symtab_p (base_decl
))
6133 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6136 SET_DECL_ALIGN (base_decl
, align_base_to
);
6137 DECL_USER_ALIGN (base_decl
) = 1;
6139 dr_info
->base_misaligned
= false;
6144 /* Function get_group_alias_ptr_type.
6146 Return the alias type for the group starting at FIRST_STMT_INFO. */
6149 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6151 struct data_reference
*first_dr
, *next_dr
;
6153 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6154 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6155 while (next_stmt_info
)
6157 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6158 if (get_alias_set (DR_REF (first_dr
))
6159 != get_alias_set (DR_REF (next_dr
)))
6161 if (dump_enabled_p ())
6162 dump_printf_loc (MSG_NOTE
, vect_location
,
6163 "conflicting alias set types.\n");
6164 return ptr_type_node
;
6166 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6168 return reference_alias_ptr_type (DR_REF (first_dr
));
6172 /* Function vectorizable_store.
6174 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6175 that can be vectorized.
6176 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6178 Return true if STMT_INFO is vectorizable in this way. */
6181 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6182 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
6183 stmt_vector_for_cost
*cost_vec
)
6187 tree vec_oprnd
= NULL_TREE
;
6189 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6190 struct loop
*loop
= NULL
;
6191 machine_mode vec_mode
;
6193 enum dr_alignment_support alignment_support_scheme
;
6194 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
6195 enum vect_def_type mask_dt
= vect_unknown_def_type
;
6196 stmt_vec_info prev_stmt_info
= NULL
;
6197 tree dataref_ptr
= NULL_TREE
;
6198 tree dataref_offset
= NULL_TREE
;
6199 gimple
*ptr_incr
= NULL
;
6202 stmt_vec_info first_stmt_info
;
6204 unsigned int group_size
, i
;
6205 vec
<tree
> oprnds
= vNULL
;
6206 vec
<tree
> result_chain
= vNULL
;
6207 tree offset
= NULL_TREE
;
6208 vec
<tree
> vec_oprnds
= vNULL
;
6209 bool slp
= (slp_node
!= NULL
);
6210 unsigned int vec_num
;
6211 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6212 vec_info
*vinfo
= stmt_info
->vinfo
;
6214 gather_scatter_info gs_info
;
6216 vec_load_store_type vls_type
;
6219 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6222 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6226 /* Is vectorizable store? */
6228 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6229 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6231 tree scalar_dest
= gimple_assign_lhs (assign
);
6232 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
6233 && is_pattern_stmt_p (stmt_info
))
6234 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
6235 if (TREE_CODE (scalar_dest
) != ARRAY_REF
6236 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
6237 && TREE_CODE (scalar_dest
) != INDIRECT_REF
6238 && TREE_CODE (scalar_dest
) != COMPONENT_REF
6239 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
6240 && TREE_CODE (scalar_dest
) != REALPART_EXPR
6241 && TREE_CODE (scalar_dest
) != MEM_REF
)
6246 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
6247 if (!call
|| !gimple_call_internal_p (call
))
6250 internal_fn ifn
= gimple_call_internal_fn (call
);
6251 if (!internal_store_fn_p (ifn
))
6254 if (slp_node
!= NULL
)
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6258 "SLP of masked stores not supported.\n");
6262 int mask_index
= internal_fn_mask_index (ifn
);
6263 if (mask_index
>= 0)
6265 mask
= gimple_call_arg (call
, mask_index
);
6266 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
6272 op
= vect_get_store_rhs (stmt_info
);
6274 /* Cannot have hybrid store SLP -- that would mean storing to the
6275 same location twice. */
6276 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
6278 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
6279 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6283 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6284 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6289 /* Multiple types in SLP are handled by creating the appropriate number of
6290 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6295 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6297 gcc_assert (ncopies
>= 1);
6299 /* FORNOW. This restriction should be relaxed. */
6300 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
6302 if (dump_enabled_p ())
6303 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6304 "multiple types in nested loop.\n");
6308 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
6311 elem_type
= TREE_TYPE (vectype
);
6312 vec_mode
= TYPE_MODE (vectype
);
6314 if (!STMT_VINFO_DATA_REF (stmt_info
))
6317 vect_memory_access_type memory_access_type
;
6318 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
6319 &memory_access_type
, &gs_info
))
6324 if (memory_access_type
== VMAT_CONTIGUOUS
)
6326 if (!VECTOR_MODE_P (vec_mode
)
6327 || !can_vec_mask_load_store_p (vec_mode
,
6328 TYPE_MODE (mask_vectype
), false))
6331 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
6332 && (memory_access_type
!= VMAT_GATHER_SCATTER
|| gs_info
.decl
))
6334 if (dump_enabled_p ())
6335 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6336 "unsupported access type for masked store.\n");
6342 /* FORNOW. In some cases can vectorize even if data-type not supported
6343 (e.g. - array initialization with 0). */
6344 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
6348 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
6349 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6350 && memory_access_type
!= VMAT_GATHER_SCATTER
6351 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
6354 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
6355 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
6356 group_size
= DR_GROUP_SIZE (first_stmt_info
);
6360 first_stmt_info
= stmt_info
;
6361 first_dr_info
= dr_info
;
6362 group_size
= vec_num
= 1;
6365 if (!vec_stmt
) /* transformation not required. */
6367 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6370 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
6371 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
6372 memory_access_type
, &gs_info
);
6374 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
6375 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
6376 vls_type
, slp_node
, cost_vec
);
6379 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6383 ensure_base_align (dr_info
);
6385 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
6387 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
6388 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6389 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6390 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
6391 edge pe
= loop_preheader_edge (loop
);
6394 enum { NARROW
, NONE
, WIDEN
} modifier
;
6395 poly_uint64 scatter_off_nunits
6396 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6398 if (known_eq (nunits
, scatter_off_nunits
))
6400 else if (known_eq (nunits
* 2, scatter_off_nunits
))
6404 /* Currently gathers and scatters are only supported for
6405 fixed-length vectors. */
6406 unsigned int count
= scatter_off_nunits
.to_constant ();
6407 vec_perm_builder
sel (count
, count
, 1);
6408 for (i
= 0; i
< (unsigned int) count
; ++i
)
6409 sel
.quick_push (i
| (count
/ 2));
6411 vec_perm_indices
indices (sel
, 1, count
);
6412 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6414 gcc_assert (perm_mask
!= NULL_TREE
);
6416 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6420 /* Currently gathers and scatters are only supported for
6421 fixed-length vectors. */
6422 unsigned int count
= nunits
.to_constant ();
6423 vec_perm_builder
sel (count
, count
, 1);
6424 for (i
= 0; i
< (unsigned int) count
; ++i
)
6425 sel
.quick_push (i
| (count
/ 2));
6427 vec_perm_indices
indices (sel
, 2, count
);
6428 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6429 gcc_assert (perm_mask
!= NULL_TREE
);
6435 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6436 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6437 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6438 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6439 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6440 scaletype
= TREE_VALUE (arglist
);
6442 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6443 && TREE_CODE (rettype
) == VOID_TYPE
);
6445 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6446 if (!is_gimple_min_invariant (ptr
))
6448 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6449 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6450 gcc_assert (!new_bb
);
6453 /* Currently we support only unconditional scatter stores,
6454 so mask should be all ones. */
6455 mask
= build_int_cst (masktype
, -1);
6456 mask
= vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
6458 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6460 prev_stmt_info
= NULL
;
6461 for (j
= 0; j
< ncopies
; ++j
)
6466 = vect_get_vec_def_for_operand (op
, stmt_info
);
6468 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt_info
);
6470 else if (modifier
!= NONE
&& (j
& 1))
6472 if (modifier
== WIDEN
)
6475 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
6476 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6479 else if (modifier
== NARROW
)
6481 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6484 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
6492 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
6494 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
6497 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6499 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6500 TYPE_VECTOR_SUBPARTS (srctype
)));
6501 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6502 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6504 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6505 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6509 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6511 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6512 TYPE_VECTOR_SUBPARTS (idxtype
)));
6513 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6514 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6516 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6517 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6522 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
6523 stmt_vec_info new_stmt_info
6524 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6526 if (prev_stmt_info
== NULL
)
6527 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6529 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6530 prev_stmt_info
= new_stmt_info
;
6535 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6536 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
6541 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
6543 /* We vectorize all the stmts of the interleaving group when we
6544 reach the last stmt in the group. */
6545 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
6546 < DR_GROUP_SIZE (first_stmt_info
)
6555 grouped_store
= false;
6556 /* VEC_NUM is the number of vect stmts to be created for this
6558 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6559 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6560 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
6561 == first_stmt_info
);
6562 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
6563 op
= vect_get_store_rhs (first_stmt_info
);
6566 /* VEC_NUM is the number of vect stmts to be created for this
6568 vec_num
= group_size
;
6570 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
6573 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
6575 if (dump_enabled_p ())
6576 dump_printf_loc (MSG_NOTE
, vect_location
,
6577 "transform store. ncopies = %d\n", ncopies
);
6579 if (memory_access_type
== VMAT_ELEMENTWISE
6580 || memory_access_type
== VMAT_STRIDED_SLP
)
6582 gimple_stmt_iterator incr_gsi
;
6588 tree stride_base
, stride_step
, alias_off
;
6591 /* Checked by get_load_store_type. */
6592 unsigned int const_nunits
= nunits
.to_constant ();
6594 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6595 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
6598 = fold_build_pointer_plus
6599 (DR_BASE_ADDRESS (first_dr_info
->dr
),
6600 size_binop (PLUS_EXPR
,
6601 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
6602 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
6603 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
6605 /* For a store with loop-invariant (but other than power-of-2)
6606 stride (i.e. not a grouped access) like so:
6608 for (i = 0; i < n; i += stride)
6611 we generate a new induction variable and new stores from
6612 the components of the (vectorized) rhs:
6614 for (j = 0; ; j += VF*stride)
6619 array[j + stride] = tmp2;
6623 unsigned nstores
= const_nunits
;
6625 tree ltype
= elem_type
;
6626 tree lvectype
= vectype
;
6629 if (group_size
< const_nunits
6630 && const_nunits
% group_size
== 0)
6632 nstores
= const_nunits
/ group_size
;
6634 ltype
= build_vector_type (elem_type
, group_size
);
6637 /* First check if vec_extract optab doesn't support extraction
6638 of vector elts directly. */
6639 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6641 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6642 || !VECTOR_MODE_P (vmode
)
6643 || !targetm
.vector_mode_supported_p (vmode
)
6644 || (convert_optab_handler (vec_extract_optab
,
6645 TYPE_MODE (vectype
), vmode
)
6646 == CODE_FOR_nothing
))
6648 /* Try to avoid emitting an extract of vector elements
6649 by performing the extracts using an integer type of the
6650 same size, extracting from a vector of those and then
6651 re-interpreting it as the original vector type if
6654 = group_size
* GET_MODE_BITSIZE (elmode
);
6655 elmode
= int_mode_for_size (lsize
, 0).require ();
6656 unsigned int lnunits
= const_nunits
/ group_size
;
6657 /* If we can't construct such a vector fall back to
6658 element extracts from the original vector type and
6659 element size stores. */
6660 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6661 && VECTOR_MODE_P (vmode
)
6662 && targetm
.vector_mode_supported_p (vmode
)
6663 && (convert_optab_handler (vec_extract_optab
,
6665 != CODE_FOR_nothing
))
6669 ltype
= build_nonstandard_integer_type (lsize
, 1);
6670 lvectype
= build_vector_type (ltype
, nstores
);
6672 /* Else fall back to vector extraction anyway.
6673 Fewer stores are more important than avoiding spilling
6674 of the vector we extract from. Compared to the
6675 construction case in vectorizable_load no store-forwarding
6676 issue exists here for reasonable archs. */
6679 else if (group_size
>= const_nunits
6680 && group_size
% const_nunits
== 0)
6683 lnel
= const_nunits
;
6687 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6688 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6691 ivstep
= stride_step
;
6692 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6693 build_int_cst (TREE_TYPE (ivstep
), vf
));
6695 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6697 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
6698 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
6699 create_iv (stride_base
, ivstep
, NULL
,
6700 loop
, &incr_gsi
, insert_after
,
6702 incr
= gsi_stmt (incr_gsi
);
6703 loop_vinfo
->add_stmt (incr
);
6705 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
6707 prev_stmt_info
= NULL
;
6708 alias_off
= build_int_cst (ref_type
, 0);
6709 stmt_vec_info next_stmt_info
= first_stmt_info
;
6710 for (g
= 0; g
< group_size
; g
++)
6712 running_off
= offvar
;
6715 tree size
= TYPE_SIZE_UNIT (ltype
);
6716 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6718 tree newoff
= copy_ssa_name (running_off
, NULL
);
6719 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6721 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
6722 running_off
= newoff
;
6724 unsigned int group_el
= 0;
6725 unsigned HOST_WIDE_INT
6726 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6727 for (j
= 0; j
< ncopies
; j
++)
6729 /* We've set op and dt above, from vect_get_store_rhs,
6730 and first_stmt_info == stmt_info. */
6735 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
6736 &vec_oprnds
, NULL
, slp_node
);
6737 vec_oprnd
= vec_oprnds
[0];
6741 op
= vect_get_store_rhs (next_stmt_info
);
6742 vec_oprnd
= vect_get_vec_def_for_operand
6743 (op
, next_stmt_info
);
6749 vec_oprnd
= vec_oprnds
[j
];
6751 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
6754 /* Pun the vector to extract from if necessary. */
6755 if (lvectype
!= vectype
)
6757 tree tem
= make_ssa_name (lvectype
);
6759 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6760 lvectype
, vec_oprnd
));
6761 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
6764 for (i
= 0; i
< nstores
; i
++)
6766 tree newref
, newoff
;
6767 gimple
*incr
, *assign
;
6768 tree size
= TYPE_SIZE (ltype
);
6769 /* Extract the i'th component. */
6770 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6771 bitsize_int (i
), size
);
6772 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6775 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6779 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6781 newref
= build2 (MEM_REF
, ltype
,
6782 running_off
, this_off
);
6783 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
6785 /* And store it to *running_off. */
6786 assign
= gimple_build_assign (newref
, elem
);
6787 stmt_vec_info assign_info
6788 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
6792 || group_el
== group_size
)
6794 newoff
= copy_ssa_name (running_off
, NULL
);
6795 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6796 running_off
, stride_step
);
6797 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
6799 running_off
= newoff
;
6802 if (g
== group_size
- 1
6805 if (j
== 0 && i
== 0)
6806 STMT_VINFO_VEC_STMT (stmt_info
)
6807 = *vec_stmt
= assign_info
;
6809 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
6810 prev_stmt_info
= assign_info
;
6814 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6819 vec_oprnds
.release ();
6823 auto_vec
<tree
> dr_chain (group_size
);
6824 oprnds
.create (group_size
);
6826 alignment_support_scheme
6827 = vect_supportable_dr_alignment (first_dr_info
, false);
6828 gcc_assert (alignment_support_scheme
);
6829 vec_loop_masks
*loop_masks
6830 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6831 ? &LOOP_VINFO_MASKS (loop_vinfo
)
6833 /* Targets with store-lane instructions must not require explicit
6834 realignment. vect_supportable_dr_alignment always returns either
6835 dr_aligned or dr_unaligned_supported for masked operations. */
6836 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
6839 || alignment_support_scheme
== dr_aligned
6840 || alignment_support_scheme
== dr_unaligned_supported
);
6842 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6843 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6844 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6847 tree vec_offset
= NULL_TREE
;
6848 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6850 aggr_type
= NULL_TREE
;
6853 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
6855 aggr_type
= elem_type
;
6856 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
6857 &bump
, &vec_offset
);
6861 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6862 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6864 aggr_type
= vectype
;
6865 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
6866 memory_access_type
);
6870 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
6872 /* In case the vectorization factor (VF) is bigger than the number
6873 of elements that we can fit in a vectype (nunits), we have to generate
6874 more than one vector stmt - i.e - we need to "unroll" the
6875 vector stmt by a factor VF/nunits. For more details see documentation in
6876 vect_get_vec_def_for_copy_stmt. */
6878 /* In case of interleaving (non-unit grouped access):
6885 We create vectorized stores starting from base address (the access of the
6886 first stmt in the chain (S2 in the above example), when the last store stmt
6887 of the chain (S4) is reached:
6890 VS2: &base + vec_size*1 = vx0
6891 VS3: &base + vec_size*2 = vx1
6892 VS4: &base + vec_size*3 = vx3
6894 Then permutation statements are generated:
6896 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6897 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6900 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6901 (the order of the data-refs in the output of vect_permute_store_chain
6902 corresponds to the order of scalar stmts in the interleaving chain - see
6903 the documentation of vect_permute_store_chain()).
6905 In case of both multiple types and interleaving, above vector stores and
6906 permutation stmts are created for every copy. The result vector stmts are
6907 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6908 STMT_VINFO_RELATED_STMT for the next copies.
6911 prev_stmt_info
= NULL
;
6912 tree vec_mask
= NULL_TREE
;
6913 for (j
= 0; j
< ncopies
; j
++)
6915 stmt_vec_info new_stmt_info
;
6920 /* Get vectorized arguments for SLP_NODE. */
6921 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
6924 vec_oprnd
= vec_oprnds
[0];
6928 /* For interleaved stores we collect vectorized defs for all the
6929 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6930 used as an input to vect_permute_store_chain(), and OPRNDS as
6931 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6933 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6934 OPRNDS are of size 1. */
6935 stmt_vec_info next_stmt_info
= first_stmt_info
;
6936 for (i
= 0; i
< group_size
; i
++)
6938 /* Since gaps are not supported for interleaved stores,
6939 DR_GROUP_SIZE is the exact number of stmts in the chain.
6940 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6941 that there is no interleaving, DR_GROUP_SIZE is 1,
6942 and only one iteration of the loop will be executed. */
6943 op
= vect_get_store_rhs (next_stmt_info
);
6944 vec_oprnd
= vect_get_vec_def_for_operand
6945 (op
, next_stmt_info
);
6946 dr_chain
.quick_push (vec_oprnd
);
6947 oprnds
.quick_push (vec_oprnd
);
6948 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6951 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
6955 /* We should have catched mismatched types earlier. */
6956 gcc_assert (useless_type_conversion_p (vectype
,
6957 TREE_TYPE (vec_oprnd
)));
6958 bool simd_lane_access_p
6959 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6960 if (simd_lane_access_p
6961 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
6962 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
6963 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
6964 && integer_zerop (DR_INIT (first_dr_info
->dr
))
6965 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6966 get_alias_set (TREE_TYPE (ref_type
))))
6968 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
6969 dataref_offset
= build_int_cst (ref_type
, 0);
6971 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6972 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
6973 &dataref_ptr
, &vec_offset
);
6976 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
6977 simd_lane_access_p
? loop
: NULL
,
6978 offset
, &dummy
, gsi
, &ptr_incr
,
6979 simd_lane_access_p
, NULL_TREE
, bump
);
6983 /* For interleaved stores we created vectorized defs for all the
6984 defs stored in OPRNDS in the previous iteration (previous copy).
6985 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6986 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6988 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6989 OPRNDS are of size 1. */
6990 for (i
= 0; i
< group_size
; i
++)
6993 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
6994 dr_chain
[i
] = vec_oprnd
;
6995 oprnds
[i
] = vec_oprnd
;
6998 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
7001 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7002 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7003 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
7005 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7009 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7013 /* Get an array into which we can store the individual vectors. */
7014 vec_array
= create_vector_array (vectype
, vec_num
);
7016 /* Invalidate the current contents of VEC_ARRAY. This should
7017 become an RTL clobber too, which prevents the vector registers
7018 from being upward-exposed. */
7019 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
7021 /* Store the individual vectors into the array. */
7022 for (i
= 0; i
< vec_num
; i
++)
7024 vec_oprnd
= dr_chain
[i
];
7025 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
7028 tree final_mask
= NULL
;
7030 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7033 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7040 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7042 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7043 tree alias_ptr
= build_int_cst (ref_type
, align
);
7044 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7045 dataref_ptr
, alias_ptr
,
7046 final_mask
, vec_array
);
7051 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7052 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7053 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7055 gimple_call_set_lhs (call
, data_ref
);
7057 gimple_call_set_nothrow (call
, true);
7058 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7060 /* Record that VEC_ARRAY is now dead. */
7061 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
7065 new_stmt_info
= NULL
;
7069 result_chain
.create (group_size
);
7071 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
7075 stmt_vec_info next_stmt_info
= first_stmt_info
;
7076 for (i
= 0; i
< vec_num
; i
++)
7078 unsigned align
, misalign
;
7080 tree final_mask
= NULL_TREE
;
7082 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
7084 vectype
, vec_num
* j
+ i
);
7086 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7089 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7091 tree scale
= size_int (gs_info
.scale
);
7094 call
= gimple_build_call_internal
7095 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
7096 scale
, vec_oprnd
, final_mask
);
7098 call
= gimple_build_call_internal
7099 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
7101 gimple_call_set_nothrow (call
, true);
7103 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7108 /* Bump the vector pointer. */
7109 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7113 vec_oprnd
= vec_oprnds
[i
];
7114 else if (grouped_store
)
7115 /* For grouped stores vectorized defs are interleaved in
7116 vect_permute_store_chain(). */
7117 vec_oprnd
= result_chain
[i
];
7119 align
= DR_TARGET_ALIGNMENT (first_dr_info
);
7120 if (aligned_access_p (first_dr_info
))
7122 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
7124 align
= dr_alignment (vect_dr_behavior (first_dr_info
));
7128 misalign
= DR_MISALIGNMENT (first_dr_info
);
7129 if (dataref_offset
== NULL_TREE
7130 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7131 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
7134 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7136 tree perm_mask
= perm_mask_for_reverse (vectype
);
7137 tree perm_dest
= vect_create_destination_var
7138 (vect_get_store_rhs (stmt_info
), vectype
);
7139 tree new_temp
= make_ssa_name (perm_dest
);
7141 /* Generate the permute statement. */
7143 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
7144 vec_oprnd
, perm_mask
);
7145 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
7147 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7148 vec_oprnd
= new_temp
;
7151 /* Arguments are ready. Create the new vector stmt. */
7154 align
= least_bit_hwi (misalign
| align
);
7155 tree ptr
= build_int_cst (ref_type
, align
);
7157 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
7159 final_mask
, vec_oprnd
);
7160 gimple_call_set_nothrow (call
, true);
7162 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7166 data_ref
= fold_build2 (MEM_REF
, vectype
,
7170 : build_int_cst (ref_type
, 0));
7171 if (aligned_access_p (first_dr_info
))
7173 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
7174 TREE_TYPE (data_ref
)
7175 = build_aligned_type (TREE_TYPE (data_ref
),
7176 align
* BITS_PER_UNIT
);
7178 TREE_TYPE (data_ref
)
7179 = build_aligned_type (TREE_TYPE (data_ref
),
7180 TYPE_ALIGN (elem_type
));
7181 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
7183 = gimple_build_assign (data_ref
, vec_oprnd
);
7185 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7191 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7192 if (!next_stmt_info
)
7199 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7201 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7202 prev_stmt_info
= new_stmt_info
;
7207 result_chain
.release ();
7208 vec_oprnds
.release ();
7213 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7214 VECTOR_CST mask. No checks are made that the target platform supports the
7215 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7216 vect_gen_perm_mask_checked. */
7219 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
7223 poly_uint64 nunits
= sel
.length ();
7224 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
7226 mask_type
= build_vector_type (ssizetype
, nunits
);
7227 return vec_perm_indices_to_tree (mask_type
, sel
);
7230 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7231 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7234 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
7236 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
7237 return vect_gen_perm_mask_any (vectype
, sel
);
7240 /* Given a vector variable X and Y, that was generated for the scalar
7241 STMT_INFO, generate instructions to permute the vector elements of X and Y
7242 using permutation mask MASK_VEC, insert them at *GSI and return the
7243 permuted vector variable. */
7246 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
7247 gimple_stmt_iterator
*gsi
)
7249 tree vectype
= TREE_TYPE (x
);
7250 tree perm_dest
, data_ref
;
7253 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
7254 if (TREE_CODE (scalar_dest
) == SSA_NAME
)
7255 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7257 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
7258 data_ref
= make_ssa_name (perm_dest
);
7260 /* Generate the permute statement. */
7261 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
7262 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
7267 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7268 inserting them on the loops preheader edge. Returns true if we
7269 were successful in doing so (and thus STMT_INFO can be moved then),
7270 otherwise returns false. */
7273 hoist_defs_of_uses (stmt_vec_info stmt_info
, struct loop
*loop
)
7279 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
7281 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7282 if (!gimple_nop_p (def_stmt
)
7283 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7285 /* Make sure we don't need to recurse. While we could do
7286 so in simple cases when there are more complex use webs
7287 we don't have an easy way to preserve stmt order to fulfil
7288 dependencies within them. */
7291 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
7293 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
7295 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
7296 if (!gimple_nop_p (def_stmt2
)
7297 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
7307 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
7309 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7310 if (!gimple_nop_p (def_stmt
)
7311 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7313 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7314 gsi_remove (&gsi
, false);
7315 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
7322 /* vectorizable_load.
7324 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7325 that can be vectorized.
7326 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7327 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7328 Return true if STMT_INFO is vectorizable in this way. */
7331 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7332 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7333 slp_instance slp_node_instance
,
7334 stmt_vector_for_cost
*cost_vec
)
7337 tree vec_dest
= NULL
;
7338 tree data_ref
= NULL
;
7339 stmt_vec_info prev_stmt_info
;
7340 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7341 struct loop
*loop
= NULL
;
7342 struct loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
7343 bool nested_in_vect_loop
= false;
7348 enum dr_alignment_support alignment_support_scheme
;
7349 tree dataref_ptr
= NULL_TREE
;
7350 tree dataref_offset
= NULL_TREE
;
7351 gimple
*ptr_incr
= NULL
;
7354 unsigned int group_size
;
7355 poly_uint64 group_gap_adj
;
7356 tree msq
= NULL_TREE
, lsq
;
7357 tree offset
= NULL_TREE
;
7358 tree byte_offset
= NULL_TREE
;
7359 tree realignment_token
= NULL_TREE
;
7361 vec
<tree
> dr_chain
= vNULL
;
7362 bool grouped_load
= false;
7363 stmt_vec_info first_stmt_info
;
7364 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
7365 bool compute_in_loop
= false;
7366 struct loop
*at_loop
;
7368 bool slp
= (slp_node
!= NULL
);
7369 bool slp_perm
= false;
7370 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7373 gather_scatter_info gs_info
;
7374 vec_info
*vinfo
= stmt_info
->vinfo
;
7376 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7378 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7381 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7385 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7386 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7388 scalar_dest
= gimple_assign_lhs (assign
);
7389 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
7392 tree_code code
= gimple_assign_rhs_code (assign
);
7393 if (code
!= ARRAY_REF
7394 && code
!= BIT_FIELD_REF
7395 && code
!= INDIRECT_REF
7396 && code
!= COMPONENT_REF
7397 && code
!= IMAGPART_EXPR
7398 && code
!= REALPART_EXPR
7400 && TREE_CODE_CLASS (code
) != tcc_declaration
)
7405 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7406 if (!call
|| !gimple_call_internal_p (call
))
7409 internal_fn ifn
= gimple_call_internal_fn (call
);
7410 if (!internal_load_fn_p (ifn
))
7413 scalar_dest
= gimple_call_lhs (call
);
7417 if (slp_node
!= NULL
)
7419 if (dump_enabled_p ())
7420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7421 "SLP of masked loads not supported.\n");
7425 int mask_index
= internal_fn_mask_index (ifn
);
7426 if (mask_index
>= 0)
7428 mask
= gimple_call_arg (call
, mask_index
);
7429 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
7435 if (!STMT_VINFO_DATA_REF (stmt_info
))
7438 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7439 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7443 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7444 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
7445 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7450 /* Multiple types in SLP are handled by creating the appropriate number of
7451 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7456 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7458 gcc_assert (ncopies
>= 1);
7460 /* FORNOW. This restriction should be relaxed. */
7461 if (nested_in_vect_loop
&& ncopies
> 1)
7463 if (dump_enabled_p ())
7464 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7465 "multiple types in nested loop.\n");
7469 /* Invalidate assumptions made by dependence analysis when vectorization
7470 on the unrolled body effectively re-orders stmts. */
7472 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7473 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7474 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7476 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7478 "cannot perform implicit CSE when unrolling "
7479 "with negative dependence distance\n");
7483 elem_type
= TREE_TYPE (vectype
);
7484 mode
= TYPE_MODE (vectype
);
7486 /* FORNOW. In some cases can vectorize even if data-type not supported
7487 (e.g. - data copies). */
7488 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7490 if (dump_enabled_p ())
7491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7492 "Aligned load, but unsupported type.\n");
7496 /* Check if the load is a part of an interleaving chain. */
7497 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7499 grouped_load
= true;
7501 gcc_assert (!nested_in_vect_loop
);
7502 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7504 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7505 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7507 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7510 /* Invalidate assumptions made by dependence analysis when vectorization
7511 on the unrolled body effectively re-orders stmts. */
7512 if (!PURE_SLP_STMT (stmt_info
)
7513 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7514 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7515 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7517 if (dump_enabled_p ())
7518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7519 "cannot perform implicit CSE when performing "
7520 "group loads with negative dependence distance\n");
7524 /* Similarly when the stmt is a load that is both part of a SLP
7525 instance and a loop vectorized stmt via the same-dr mechanism
7526 we have to give up. */
7527 if (DR_GROUP_SAME_DR_STMT (stmt_info
)
7528 && (STMT_SLP_TYPE (stmt_info
)
7529 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info
))))
7531 if (dump_enabled_p ())
7532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7533 "conflicting SLP types for CSEd load\n");
7540 vect_memory_access_type memory_access_type
;
7541 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7542 &memory_access_type
, &gs_info
))
7547 if (memory_access_type
== VMAT_CONTIGUOUS
)
7549 machine_mode vec_mode
= TYPE_MODE (vectype
);
7550 if (!VECTOR_MODE_P (vec_mode
)
7551 || !can_vec_mask_load_store_p (vec_mode
,
7552 TYPE_MODE (mask_vectype
), true))
7555 else if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7557 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7559 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
7560 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
7562 if (dump_enabled_p ())
7563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7564 "masked gather with integer mask not"
7569 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7570 && memory_access_type
!= VMAT_GATHER_SCATTER
)
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7574 "unsupported access type for masked load.\n");
7579 if (!vec_stmt
) /* transformation not required. */
7582 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7585 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7586 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7587 memory_access_type
, &gs_info
);
7589 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7590 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7591 slp_node_instance
, slp_node
, cost_vec
);
7596 gcc_assert (memory_access_type
7597 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7599 if (dump_enabled_p ())
7600 dump_printf_loc (MSG_NOTE
, vect_location
,
7601 "transform load. ncopies = %d\n", ncopies
);
7605 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7606 ensure_base_align (dr_info
);
7608 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7610 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
7614 if (memory_access_type
== VMAT_INVARIANT
)
7616 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
7617 /* If we have versioned for aliasing or the loop doesn't
7618 have any data dependencies that would preclude this,
7619 then we are sure this is a loop invariant load and
7620 thus we can insert it on the preheader edge. */
7621 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7622 && !nested_in_vect_loop
7623 && hoist_defs_of_uses (stmt_info
, loop
));
7626 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
7627 if (dump_enabled_p ())
7628 dump_printf_loc (MSG_NOTE
, vect_location
,
7629 "hoisting out of the vectorized loop: %G", stmt
);
7630 scalar_dest
= copy_ssa_name (scalar_dest
);
7631 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
7632 gsi_insert_on_edge_immediate
7633 (loop_preheader_edge (loop
),
7634 gimple_build_assign (scalar_dest
, rhs
));
7636 /* These copies are all equivalent, but currently the representation
7637 requires a separate STMT_VINFO_VEC_STMT for each one. */
7638 prev_stmt_info
= NULL
;
7639 gimple_stmt_iterator gsi2
= *gsi
;
7641 for (j
= 0; j
< ncopies
; j
++)
7643 stmt_vec_info new_stmt_info
;
7646 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
7648 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7649 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
7653 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
7655 new_stmt_info
= vinfo
->lookup_def (new_temp
);
7658 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
7660 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7662 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7663 prev_stmt_info
= new_stmt_info
;
7668 if (memory_access_type
== VMAT_ELEMENTWISE
7669 || memory_access_type
== VMAT_STRIDED_SLP
)
7671 gimple_stmt_iterator incr_gsi
;
7677 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7678 tree stride_base
, stride_step
, alias_off
;
7679 /* Checked by get_load_store_type. */
7680 unsigned int const_nunits
= nunits
.to_constant ();
7681 unsigned HOST_WIDE_INT cst_offset
= 0;
7683 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7684 gcc_assert (!nested_in_vect_loop
);
7688 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7689 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7693 first_stmt_info
= stmt_info
;
7694 first_dr_info
= dr_info
;
7696 if (slp
&& grouped_load
)
7698 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7699 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7705 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
7706 * vect_get_place_in_interleaving_chain (stmt_info
,
7709 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7713 = fold_build_pointer_plus
7714 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7715 size_binop (PLUS_EXPR
,
7716 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
7717 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7718 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7720 /* For a load with loop-invariant (but other than power-of-2)
7721 stride (i.e. not a grouped access) like so:
7723 for (i = 0; i < n; i += stride)
7726 we generate a new induction variable and new accesses to
7727 form a new vector (or vectors, depending on ncopies):
7729 for (j = 0; ; j += VF*stride)
7731 tmp2 = array[j + stride];
7733 vectemp = {tmp1, tmp2, ...}
7736 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7737 build_int_cst (TREE_TYPE (stride_step
), vf
));
7739 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7741 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7742 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7743 create_iv (stride_base
, ivstep
, NULL
,
7744 loop
, &incr_gsi
, insert_after
,
7746 incr
= gsi_stmt (incr_gsi
);
7747 loop_vinfo
->add_stmt (incr
);
7749 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7751 prev_stmt_info
= NULL
;
7752 running_off
= offvar
;
7753 alias_off
= build_int_cst (ref_type
, 0);
7754 int nloads
= const_nunits
;
7756 tree ltype
= TREE_TYPE (vectype
);
7757 tree lvectype
= vectype
;
7758 auto_vec
<tree
> dr_chain
;
7759 if (memory_access_type
== VMAT_STRIDED_SLP
)
7761 if (group_size
< const_nunits
)
7763 /* First check if vec_init optab supports construction from
7764 vector elts directly. */
7765 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7767 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7768 && VECTOR_MODE_P (vmode
)
7769 && targetm
.vector_mode_supported_p (vmode
)
7770 && (convert_optab_handler (vec_init_optab
,
7771 TYPE_MODE (vectype
), vmode
)
7772 != CODE_FOR_nothing
))
7774 nloads
= const_nunits
/ group_size
;
7776 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7780 /* Otherwise avoid emitting a constructor of vector elements
7781 by performing the loads using an integer type of the same
7782 size, constructing a vector of those and then
7783 re-interpreting it as the original vector type.
7784 This avoids a huge runtime penalty due to the general
7785 inability to perform store forwarding from smaller stores
7786 to a larger load. */
7788 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7789 elmode
= int_mode_for_size (lsize
, 0).require ();
7790 unsigned int lnunits
= const_nunits
/ group_size
;
7791 /* If we can't construct such a vector fall back to
7792 element loads of the original vector type. */
7793 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7794 && VECTOR_MODE_P (vmode
)
7795 && targetm
.vector_mode_supported_p (vmode
)
7796 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7797 != CODE_FOR_nothing
))
7801 ltype
= build_nonstandard_integer_type (lsize
, 1);
7802 lvectype
= build_vector_type (ltype
, nloads
);
7809 lnel
= const_nunits
;
7812 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7814 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7815 else if (nloads
== 1)
7820 /* For SLP permutation support we need to load the whole group,
7821 not only the number of vector stmts the permutation result
7825 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7827 unsigned int const_vf
= vf
.to_constant ();
7828 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7829 dr_chain
.create (ncopies
);
7832 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7834 unsigned int group_el
= 0;
7835 unsigned HOST_WIDE_INT
7836 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7837 for (j
= 0; j
< ncopies
; j
++)
7840 vec_alloc (v
, nloads
);
7841 stmt_vec_info new_stmt_info
= NULL
;
7842 for (i
= 0; i
< nloads
; i
++)
7844 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7845 group_el
* elsz
+ cst_offset
);
7846 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
7847 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
7849 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
7851 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7853 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7854 gimple_assign_lhs (new_stmt
));
7858 || group_el
== group_size
)
7860 tree newoff
= copy_ssa_name (running_off
);
7861 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7862 running_off
, stride_step
);
7863 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7865 running_off
= newoff
;
7871 tree vec_inv
= build_constructor (lvectype
, v
);
7872 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
7873 new_stmt_info
= vinfo
->lookup_def (new_temp
);
7874 if (lvectype
!= vectype
)
7877 = gimple_build_assign (make_ssa_name (vectype
),
7879 build1 (VIEW_CONVERT_EXPR
,
7880 vectype
, new_temp
));
7882 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7889 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
7891 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
7896 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7898 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7899 prev_stmt_info
= new_stmt_info
;
7905 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7906 slp_node_instance
, false, &n_perms
);
7911 if (memory_access_type
== VMAT_GATHER_SCATTER
7912 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
7913 grouped_load
= false;
7917 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7918 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7919 /* For SLP vectorization we directly vectorize a subchain
7920 without permutation. */
7921 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7922 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7923 /* For BB vectorization always use the first stmt to base
7924 the data ref pointer on. */
7926 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7928 /* Check if the chain of loads is already vectorized. */
7929 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
7930 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7931 ??? But we can only do so if there is exactly one
7932 as we have no way to get at the rest. Leave the CSE
7934 ??? With the group load eventually participating
7935 in multiple different permutations (having multiple
7936 slp nodes which refer to the same group) the CSE
7937 is even wrong code. See PR56270. */
7940 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7943 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7946 /* VEC_NUM is the number of vect stmts to be created for this group. */
7949 grouped_load
= false;
7950 /* If an SLP permutation is from N elements to N elements,
7951 and if one vector holds a whole number of N, we can load
7952 the inputs to the permutation in the same way as an
7953 unpermuted sequence. In other cases we need to load the
7954 whole group, not only the number of vector stmts the
7955 permutation result fits in. */
7957 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
7958 || !multiple_p (nunits
, group_size
)))
7960 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
7961 variable VF; see vect_transform_slp_perm_load. */
7962 unsigned int const_vf
= vf
.to_constant ();
7963 unsigned int const_nunits
= nunits
.to_constant ();
7964 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
7965 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7969 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7971 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7975 vec_num
= group_size
;
7977 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7981 first_stmt_info
= stmt_info
;
7982 first_dr_info
= dr_info
;
7983 group_size
= vec_num
= 1;
7985 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7988 alignment_support_scheme
7989 = vect_supportable_dr_alignment (first_dr_info
, false);
7990 gcc_assert (alignment_support_scheme
);
7991 vec_loop_masks
*loop_masks
7992 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7993 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7995 /* Targets with store-lane instructions must not require explicit
7996 realignment. vect_supportable_dr_alignment always returns either
7997 dr_aligned or dr_unaligned_supported for masked operations. */
7998 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8001 || alignment_support_scheme
== dr_aligned
8002 || alignment_support_scheme
== dr_unaligned_supported
);
8004 /* In case the vectorization factor (VF) is bigger than the number
8005 of elements that we can fit in a vectype (nunits), we have to generate
8006 more than one vector stmt - i.e - we need to "unroll" the
8007 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8008 from one copy of the vector stmt to the next, in the field
8009 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8010 stages to find the correct vector defs to be used when vectorizing
8011 stmts that use the defs of the current stmt. The example below
8012 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8013 need to create 4 vectorized stmts):
8015 before vectorization:
8016 RELATED_STMT VEC_STMT
8020 step 1: vectorize stmt S1:
8021 We first create the vector stmt VS1_0, and, as usual, record a
8022 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8023 Next, we create the vector stmt VS1_1, and record a pointer to
8024 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8025 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8027 RELATED_STMT VEC_STMT
8028 VS1_0: vx0 = memref0 VS1_1 -
8029 VS1_1: vx1 = memref1 VS1_2 -
8030 VS1_2: vx2 = memref2 VS1_3 -
8031 VS1_3: vx3 = memref3 - -
8032 S1: x = load - VS1_0
8035 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8036 information we recorded in RELATED_STMT field is used to vectorize
8039 /* In case of interleaving (non-unit grouped access):
8046 Vectorized loads are created in the order of memory accesses
8047 starting from the access of the first stmt of the chain:
8050 VS2: vx1 = &base + vec_size*1
8051 VS3: vx3 = &base + vec_size*2
8052 VS4: vx4 = &base + vec_size*3
8054 Then permutation statements are generated:
8056 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8057 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8060 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8061 (the order of the data-refs in the output of vect_permute_load_chain
8062 corresponds to the order of scalar stmts in the interleaving chain - see
8063 the documentation of vect_permute_load_chain()).
8064 The generation of permutation stmts and recording them in
8065 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8067 In case of both multiple types and interleaving, the vector loads and
8068 permutation stmts above are created for every copy. The result vector
8069 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8070 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8072 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8073 on a target that supports unaligned accesses (dr_unaligned_supported)
8074 we generate the following code:
8078 p = p + indx * vectype_size;
8083 Otherwise, the data reference is potentially unaligned on a target that
8084 does not support unaligned accesses (dr_explicit_realign_optimized) -
8085 then generate the following code, in which the data in each iteration is
8086 obtained by two vector loads, one from the previous iteration, and one
8087 from the current iteration:
8089 msq_init = *(floor(p1))
8090 p2 = initial_addr + VS - 1;
8091 realignment_token = call target_builtin;
8094 p2 = p2 + indx * vectype_size
8096 vec_dest = realign_load (msq, lsq, realignment_token)
8101 /* If the misalignment remains the same throughout the execution of the
8102 loop, we can create the init_addr and permutation mask at the loop
8103 preheader. Otherwise, it needs to be created inside the loop.
8104 This can only occur when vectorizing memory accesses in the inner-loop
8105 nested within an outer-loop that is being vectorized. */
8107 if (nested_in_vect_loop
8108 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
8109 GET_MODE_SIZE (TYPE_MODE (vectype
))))
8111 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
8112 compute_in_loop
= true;
8115 if ((alignment_support_scheme
== dr_explicit_realign_optimized
8116 || alignment_support_scheme
== dr_explicit_realign
)
8117 && !compute_in_loop
)
8119 msq
= vect_setup_realignment (first_stmt_info
, gsi
, &realignment_token
,
8120 alignment_support_scheme
, NULL_TREE
,
8122 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8124 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
8125 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
8132 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8133 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8136 tree vec_offset
= NULL_TREE
;
8137 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8139 aggr_type
= NULL_TREE
;
8142 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8144 aggr_type
= elem_type
;
8145 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8146 &bump
, &vec_offset
);
8150 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8151 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8153 aggr_type
= vectype
;
8154 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
8155 memory_access_type
);
8158 tree vec_mask
= NULL_TREE
;
8159 prev_stmt_info
= NULL
;
8160 poly_uint64 group_elt
= 0;
8161 for (j
= 0; j
< ncopies
; j
++)
8163 stmt_vec_info new_stmt_info
= NULL
;
8164 /* 1. Create the vector or array pointer update chain. */
8167 bool simd_lane_access_p
8168 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
8169 if (simd_lane_access_p
8170 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8171 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8172 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
8173 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8174 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8175 get_alias_set (TREE_TYPE (ref_type
)))
8176 && (alignment_support_scheme
== dr_aligned
8177 || alignment_support_scheme
== dr_unaligned_supported
))
8179 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8180 dataref_offset
= build_int_cst (ref_type
, 0);
8182 else if (first_stmt_info_for_drptr
8183 && first_stmt_info
!= first_stmt_info_for_drptr
)
8186 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
8187 aggr_type
, at_loop
, offset
, &dummy
,
8188 gsi
, &ptr_incr
, simd_lane_access_p
,
8190 /* Adjust the pointer by the difference to first_stmt. */
8191 data_reference_p ptrdr
8192 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
8194 = fold_convert (sizetype
,
8195 size_binop (MINUS_EXPR
,
8196 DR_INIT (first_dr_info
->dr
),
8198 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8201 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8202 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
8203 &dataref_ptr
, &vec_offset
);
8206 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
8207 offset
, &dummy
, gsi
, &ptr_incr
,
8211 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
8217 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
8219 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8220 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8222 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8225 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8228 if (grouped_load
|| slp_perm
)
8229 dr_chain
.create (vec_num
);
8231 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8235 vec_array
= create_vector_array (vectype
, vec_num
);
8237 tree final_mask
= NULL_TREE
;
8239 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8242 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8249 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8251 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8252 tree alias_ptr
= build_int_cst (ref_type
, align
);
8253 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
8254 dataref_ptr
, alias_ptr
,
8260 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8261 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8262 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
8264 gimple_call_set_lhs (call
, vec_array
);
8265 gimple_call_set_nothrow (call
, true);
8266 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8268 /* Extract each vector into an SSA_NAME. */
8269 for (i
= 0; i
< vec_num
; i
++)
8271 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
8273 dr_chain
.quick_push (new_temp
);
8276 /* Record the mapping between SSA_NAMEs and statements. */
8277 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
8279 /* Record that VEC_ARRAY is now dead. */
8280 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8284 for (i
= 0; i
< vec_num
; i
++)
8286 tree final_mask
= NULL_TREE
;
8288 && memory_access_type
!= VMAT_INVARIANT
)
8289 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8291 vectype
, vec_num
* j
+ i
);
8293 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8297 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8300 /* 2. Create the vector-load in the loop. */
8301 gimple
*new_stmt
= NULL
;
8302 switch (alignment_support_scheme
)
8305 case dr_unaligned_supported
:
8307 unsigned int align
, misalign
;
8309 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8311 tree scale
= size_int (gs_info
.scale
);
8314 call
= gimple_build_call_internal
8315 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
8316 vec_offset
, scale
, final_mask
);
8318 call
= gimple_build_call_internal
8319 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
8321 gimple_call_set_nothrow (call
, true);
8323 data_ref
= NULL_TREE
;
8327 align
= DR_TARGET_ALIGNMENT (dr_info
);
8328 if (alignment_support_scheme
== dr_aligned
)
8330 gcc_assert (aligned_access_p (first_dr_info
));
8333 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8335 align
= dr_alignment
8336 (vect_dr_behavior (first_dr_info
));
8340 misalign
= DR_MISALIGNMENT (first_dr_info
);
8341 if (dataref_offset
== NULL_TREE
8342 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8343 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
8348 align
= least_bit_hwi (misalign
| align
);
8349 tree ptr
= build_int_cst (ref_type
, align
);
8351 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
8354 gimple_call_set_nothrow (call
, true);
8356 data_ref
= NULL_TREE
;
8361 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
8364 : build_int_cst (ref_type
, 0));
8365 if (alignment_support_scheme
== dr_aligned
)
8367 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8368 TREE_TYPE (data_ref
)
8369 = build_aligned_type (TREE_TYPE (data_ref
),
8370 align
* BITS_PER_UNIT
);
8372 TREE_TYPE (data_ref
)
8373 = build_aligned_type (TREE_TYPE (data_ref
),
8374 TYPE_ALIGN (elem_type
));
8378 case dr_explicit_realign
:
8382 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8384 if (compute_in_loop
)
8385 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
8387 dr_explicit_realign
,
8390 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8391 ptr
= copy_ssa_name (dataref_ptr
);
8393 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8394 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr_info
);
8395 new_stmt
= gimple_build_assign
8396 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
8398 (TREE_TYPE (dataref_ptr
),
8399 -(HOST_WIDE_INT
) align
));
8400 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8402 = build2 (MEM_REF
, vectype
, ptr
,
8403 build_int_cst (ref_type
, 0));
8404 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8405 vec_dest
= vect_create_destination_var (scalar_dest
,
8407 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8408 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8409 gimple_assign_set_lhs (new_stmt
, new_temp
);
8410 gimple_set_vdef (new_stmt
, gimple_vdef (stmt_info
->stmt
));
8411 gimple_set_vuse (new_stmt
, gimple_vuse (stmt_info
->stmt
));
8412 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8415 bump
= size_binop (MULT_EXPR
, vs
,
8416 TYPE_SIZE_UNIT (elem_type
));
8417 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
8418 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
8420 new_stmt
= gimple_build_assign
8421 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
8423 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
8424 ptr
= copy_ssa_name (ptr
, new_stmt
);
8425 gimple_assign_set_lhs (new_stmt
, ptr
);
8426 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8428 = build2 (MEM_REF
, vectype
, ptr
,
8429 build_int_cst (ref_type
, 0));
8432 case dr_explicit_realign_optimized
:
8434 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8435 new_temp
= copy_ssa_name (dataref_ptr
);
8437 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8438 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr_info
);
8439 new_stmt
= gimple_build_assign
8440 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
8441 build_int_cst (TREE_TYPE (dataref_ptr
),
8442 -(HOST_WIDE_INT
) align
));
8443 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8445 = build2 (MEM_REF
, vectype
, new_temp
,
8446 build_int_cst (ref_type
, 0));
8452 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8453 /* DATA_REF is null if we've already built the statement. */
8456 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8457 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8459 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8460 gimple_set_lhs (new_stmt
, new_temp
);
8462 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8464 /* 3. Handle explicit realignment if necessary/supported.
8466 vec_dest = realign_load (msq, lsq, realignment_token) */
8467 if (alignment_support_scheme
== dr_explicit_realign_optimized
8468 || alignment_support_scheme
== dr_explicit_realign
)
8470 lsq
= gimple_assign_lhs (new_stmt
);
8471 if (!realignment_token
)
8472 realignment_token
= dataref_ptr
;
8473 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8474 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
8475 msq
, lsq
, realignment_token
);
8476 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8477 gimple_assign_set_lhs (new_stmt
, new_temp
);
8479 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8481 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8484 if (i
== vec_num
- 1 && j
== ncopies
- 1)
8485 add_phi_arg (phi
, lsq
,
8486 loop_latch_edge (containing_loop
),
8492 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8494 tree perm_mask
= perm_mask_for_reverse (vectype
);
8495 new_temp
= permute_vec_elements (new_temp
, new_temp
,
8496 perm_mask
, stmt_info
, gsi
);
8497 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8500 /* Collect vector loads and later create their permutation in
8501 vect_transform_grouped_load (). */
8502 if (grouped_load
|| slp_perm
)
8503 dr_chain
.quick_push (new_temp
);
8505 /* Store vector loads in the corresponding SLP_NODE. */
8506 if (slp
&& !slp_perm
)
8507 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8509 /* With SLP permutation we load the gaps as well, without
8510 we need to skip the gaps after we manage to fully load
8511 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8512 group_elt
+= nunits
;
8513 if (maybe_ne (group_gap_adj
, 0U)
8515 && known_eq (group_elt
, group_size
- group_gap_adj
))
8517 poly_wide_int bump_val
8518 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8520 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8521 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8526 /* Bump the vector pointer to account for a gap or for excess
8527 elements loaded for a permuted SLP load. */
8528 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
8530 poly_wide_int bump_val
8531 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8533 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8534 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8539 if (slp
&& !slp_perm
)
8545 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8546 slp_node_instance
, false,
8549 dr_chain
.release ();
8557 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
8558 vect_transform_grouped_load (stmt_info
, dr_chain
,
8560 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8565 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8567 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8568 prev_stmt_info
= new_stmt_info
;
8571 dr_chain
.release ();
8577 /* Function vect_is_simple_cond.
8580 LOOP - the loop that is being vectorized.
8581 COND - Condition that is checked for simple use.
8584 *COMP_VECTYPE - the vector type for the comparison.
8585 *DTS - The def types for the arguments of the comparison
8587 Returns whether a COND can be vectorized. Checks whether
8588 condition operands are supportable using vec_is_simple_use. */
8591 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8592 tree
*comp_vectype
, enum vect_def_type
*dts
,
8596 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8599 if (TREE_CODE (cond
) == SSA_NAME
8600 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8602 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
8604 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8609 if (!COMPARISON_CLASS_P (cond
))
8612 lhs
= TREE_OPERAND (cond
, 0);
8613 rhs
= TREE_OPERAND (cond
, 1);
8615 if (TREE_CODE (lhs
) == SSA_NAME
)
8617 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
8620 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8621 || TREE_CODE (lhs
) == FIXED_CST
)
8622 dts
[0] = vect_constant_def
;
8626 if (TREE_CODE (rhs
) == SSA_NAME
)
8628 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
8631 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8632 || TREE_CODE (rhs
) == FIXED_CST
)
8633 dts
[1] = vect_constant_def
;
8637 if (vectype1
&& vectype2
8638 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8639 TYPE_VECTOR_SUBPARTS (vectype2
)))
8642 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8643 /* Invariant comparison. */
8644 if (! *comp_vectype
&& vectype
)
8646 tree scalar_type
= TREE_TYPE (lhs
);
8647 /* If we can widen the comparison to match vectype do so. */
8648 if (INTEGRAL_TYPE_P (scalar_type
)
8649 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8650 TYPE_SIZE (TREE_TYPE (vectype
))))
8651 scalar_type
= build_nonstandard_integer_type
8652 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8653 TYPE_UNSIGNED (scalar_type
));
8654 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8660 /* vectorizable_condition.
8662 Check if STMT_INFO is conditional modify expression that can be vectorized.
8663 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8664 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8667 When STMT_INFO is vectorized as a nested cycle, REDUC_DEF is the vector
8668 variable to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1,
8669 and in else clause if it is 2).
8671 Return true if STMT_INFO is vectorizable in this way. */
8674 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8675 stmt_vec_info
*vec_stmt
, tree reduc_def
,
8676 int reduc_index
, slp_tree slp_node
,
8677 stmt_vector_for_cost
*cost_vec
)
8679 vec_info
*vinfo
= stmt_info
->vinfo
;
8680 tree scalar_dest
= NULL_TREE
;
8681 tree vec_dest
= NULL_TREE
;
8682 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8683 tree then_clause
, else_clause
;
8684 tree comp_vectype
= NULL_TREE
;
8685 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8686 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8689 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8690 enum vect_def_type dts
[4]
8691 = {vect_unknown_def_type
, vect_unknown_def_type
,
8692 vect_unknown_def_type
, vect_unknown_def_type
};
8695 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8696 stmt_vec_info prev_stmt_info
= NULL
;
8698 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8699 vec
<tree
> vec_oprnds0
= vNULL
;
8700 vec
<tree
> vec_oprnds1
= vNULL
;
8701 vec
<tree
> vec_oprnds2
= vNULL
;
8702 vec
<tree
> vec_oprnds3
= vNULL
;
8704 bool masked
= false;
8706 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
8709 vect_reduction_type reduction_type
8710 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8711 if (reduction_type
== TREE_CODE_REDUCTION
)
8713 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8716 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8717 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8721 /* FORNOW: not yet supported. */
8722 if (STMT_VINFO_LIVE_P (stmt_info
))
8724 if (dump_enabled_p ())
8725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8726 "value used after loop.\n");
8731 /* Is vectorizable conditional operation? */
8732 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
8736 code
= gimple_assign_rhs_code (stmt
);
8738 if (code
!= COND_EXPR
)
8741 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8742 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8747 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8749 gcc_assert (ncopies
>= 1);
8750 if (reduc_index
&& ncopies
> 1)
8751 return false; /* FORNOW */
8753 cond_expr
= gimple_assign_rhs1 (stmt
);
8754 then_clause
= gimple_assign_rhs2 (stmt
);
8755 else_clause
= gimple_assign_rhs3 (stmt
);
8757 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8758 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
8762 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
8764 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
8767 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8770 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8773 masked
= !COMPARISON_CLASS_P (cond_expr
);
8774 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8776 if (vec_cmp_type
== NULL_TREE
)
8779 cond_code
= TREE_CODE (cond_expr
);
8782 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8783 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8786 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8788 /* Boolean values may have another representation in vectors
8789 and therefore we prefer bit operations over comparison for
8790 them (which also works for scalar masks). We store opcodes
8791 to use in bitop1 and bitop2. Statement is vectorized as
8792 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8793 depending on bitop1 and bitop2 arity. */
8797 bitop1
= BIT_NOT_EXPR
;
8798 bitop2
= BIT_AND_EXPR
;
8801 bitop1
= BIT_NOT_EXPR
;
8802 bitop2
= BIT_IOR_EXPR
;
8805 bitop1
= BIT_NOT_EXPR
;
8806 bitop2
= BIT_AND_EXPR
;
8807 std::swap (cond_expr0
, cond_expr1
);
8810 bitop1
= BIT_NOT_EXPR
;
8811 bitop2
= BIT_IOR_EXPR
;
8812 std::swap (cond_expr0
, cond_expr1
);
8815 bitop1
= BIT_XOR_EXPR
;
8818 bitop1
= BIT_XOR_EXPR
;
8819 bitop2
= BIT_NOT_EXPR
;
8824 cond_code
= SSA_NAME
;
8829 if (bitop1
!= NOP_EXPR
)
8831 machine_mode mode
= TYPE_MODE (comp_vectype
);
8834 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8835 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8838 if (bitop2
!= NOP_EXPR
)
8840 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8842 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8846 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8849 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8850 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
8861 vec_oprnds0
.create (1);
8862 vec_oprnds1
.create (1);
8863 vec_oprnds2
.create (1);
8864 vec_oprnds3
.create (1);
8868 scalar_dest
= gimple_assign_lhs (stmt
);
8869 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
8870 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8872 /* Handle cond expr. */
8873 for (j
= 0; j
< ncopies
; j
++)
8875 stmt_vec_info new_stmt_info
= NULL
;
8880 auto_vec
<tree
, 4> ops
;
8881 auto_vec
<vec
<tree
>, 4> vec_defs
;
8884 ops
.safe_push (cond_expr
);
8887 ops
.safe_push (cond_expr0
);
8888 ops
.safe_push (cond_expr1
);
8890 ops
.safe_push (then_clause
);
8891 ops
.safe_push (else_clause
);
8892 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8893 vec_oprnds3
= vec_defs
.pop ();
8894 vec_oprnds2
= vec_defs
.pop ();
8896 vec_oprnds1
= vec_defs
.pop ();
8897 vec_oprnds0
= vec_defs
.pop ();
8904 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
8906 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
, &dts
[0]);
8911 = vect_get_vec_def_for_operand (cond_expr0
,
8912 stmt_info
, comp_vectype
);
8913 vect_is_simple_use (cond_expr0
, loop_vinfo
, &dts
[0]);
8916 = vect_get_vec_def_for_operand (cond_expr1
,
8917 stmt_info
, comp_vectype
);
8918 vect_is_simple_use (cond_expr1
, loop_vinfo
, &dts
[1]);
8920 if (reduc_index
== 1)
8921 vec_then_clause
= reduc_def
;
8924 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8926 vect_is_simple_use (then_clause
, loop_vinfo
, &dts
[2]);
8928 if (reduc_index
== 2)
8929 vec_else_clause
= reduc_def
;
8932 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8934 vect_is_simple_use (else_clause
, loop_vinfo
, &dts
[3]);
8941 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
8944 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
8946 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
8947 vec_oprnds2
.pop ());
8948 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
8949 vec_oprnds3
.pop ());
8954 vec_oprnds0
.quick_push (vec_cond_lhs
);
8956 vec_oprnds1
.quick_push (vec_cond_rhs
);
8957 vec_oprnds2
.quick_push (vec_then_clause
);
8958 vec_oprnds3
.quick_push (vec_else_clause
);
8961 /* Arguments are ready. Create the new vector stmt. */
8962 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8964 vec_then_clause
= vec_oprnds2
[i
];
8965 vec_else_clause
= vec_oprnds3
[i
];
8968 vec_compare
= vec_cond_lhs
;
8971 vec_cond_rhs
= vec_oprnds1
[i
];
8972 if (bitop1
== NOP_EXPR
)
8973 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8974 vec_cond_lhs
, vec_cond_rhs
);
8977 new_temp
= make_ssa_name (vec_cmp_type
);
8979 if (bitop1
== BIT_NOT_EXPR
)
8980 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8984 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8986 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8987 if (bitop2
== NOP_EXPR
)
8988 vec_compare
= new_temp
;
8989 else if (bitop2
== BIT_NOT_EXPR
)
8991 /* Instead of doing ~x ? y : z do x ? z : y. */
8992 vec_compare
= new_temp
;
8993 std::swap (vec_then_clause
, vec_else_clause
);
8997 vec_compare
= make_ssa_name (vec_cmp_type
);
8999 = gimple_build_assign (vec_compare
, bitop2
,
9000 vec_cond_lhs
, new_temp
);
9001 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9005 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9007 if (!is_gimple_val (vec_compare
))
9009 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
9010 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
9012 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9013 vec_compare
= vec_compare_name
;
9015 gcc_assert (reduc_index
== 2);
9016 gcall
*new_stmt
= gimple_build_call_internal
9017 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
9019 gimple_call_set_lhs (new_stmt
, scalar_dest
);
9020 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
9021 if (stmt_info
->stmt
== gsi_stmt (*gsi
))
9022 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
9025 /* In this case we're moving the definition to later in the
9026 block. That doesn't matter because the only uses of the
9027 lhs are in phi statements. */
9028 gimple_stmt_iterator old_gsi
9029 = gsi_for_stmt (stmt_info
->stmt
);
9030 gsi_remove (&old_gsi
, true);
9032 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9037 new_temp
= make_ssa_name (vec_dest
);
9039 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
9040 vec_then_clause
, vec_else_clause
);
9042 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9045 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9052 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9054 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9056 prev_stmt_info
= new_stmt_info
;
9059 vec_oprnds0
.release ();
9060 vec_oprnds1
.release ();
9061 vec_oprnds2
.release ();
9062 vec_oprnds3
.release ();
9067 /* vectorizable_comparison.
9069 Check if STMT_INFO is comparison expression that can be vectorized.
9070 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9071 comparison, put it in VEC_STMT, and insert it at GSI.
9073 Return true if STMT_INFO is vectorizable in this way. */
9076 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9077 stmt_vec_info
*vec_stmt
, tree reduc_def
,
9078 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9080 vec_info
*vinfo
= stmt_info
->vinfo
;
9081 tree lhs
, rhs1
, rhs2
;
9082 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9083 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9084 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
9086 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9087 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
9091 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9092 stmt_vec_info prev_stmt_info
= NULL
;
9094 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9095 vec
<tree
> vec_oprnds0
= vNULL
;
9096 vec
<tree
> vec_oprnds1
= vNULL
;
9100 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9103 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
9106 mask_type
= vectype
;
9107 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9112 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9114 gcc_assert (ncopies
>= 1);
9115 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9116 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
9120 if (STMT_VINFO_LIVE_P (stmt_info
))
9122 if (dump_enabled_p ())
9123 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9124 "value used after loop.\n");
9128 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9132 code
= gimple_assign_rhs_code (stmt
);
9134 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
9137 rhs1
= gimple_assign_rhs1 (stmt
);
9138 rhs2
= gimple_assign_rhs2 (stmt
);
9140 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
9143 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
9146 if (vectype1
&& vectype2
9147 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9148 TYPE_VECTOR_SUBPARTS (vectype2
)))
9151 vectype
= vectype1
? vectype1
: vectype2
;
9153 /* Invariant comparison. */
9156 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
9157 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
9160 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
9163 /* Can't compare mask and non-mask types. */
9164 if (vectype1
&& vectype2
9165 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
9168 /* Boolean values may have another representation in vectors
9169 and therefore we prefer bit operations over comparison for
9170 them (which also works for scalar masks). We store opcodes
9171 to use in bitop1 and bitop2. Statement is vectorized as
9172 BITOP2 (rhs1 BITOP1 rhs2) or
9173 rhs1 BITOP2 (BITOP1 rhs2)
9174 depending on bitop1 and bitop2 arity. */
9175 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
9177 if (code
== GT_EXPR
)
9179 bitop1
= BIT_NOT_EXPR
;
9180 bitop2
= BIT_AND_EXPR
;
9182 else if (code
== GE_EXPR
)
9184 bitop1
= BIT_NOT_EXPR
;
9185 bitop2
= BIT_IOR_EXPR
;
9187 else if (code
== LT_EXPR
)
9189 bitop1
= BIT_NOT_EXPR
;
9190 bitop2
= BIT_AND_EXPR
;
9191 std::swap (rhs1
, rhs2
);
9192 std::swap (dts
[0], dts
[1]);
9194 else if (code
== LE_EXPR
)
9196 bitop1
= BIT_NOT_EXPR
;
9197 bitop2
= BIT_IOR_EXPR
;
9198 std::swap (rhs1
, rhs2
);
9199 std::swap (dts
[0], dts
[1]);
9203 bitop1
= BIT_XOR_EXPR
;
9204 if (code
== EQ_EXPR
)
9205 bitop2
= BIT_NOT_EXPR
;
9211 if (bitop1
== NOP_EXPR
)
9213 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
9218 machine_mode mode
= TYPE_MODE (vectype
);
9221 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
9222 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9225 if (bitop2
!= NOP_EXPR
)
9227 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
9228 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9233 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
9234 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
9235 dts
, ndts
, slp_node
, cost_vec
);
9242 vec_oprnds0
.create (1);
9243 vec_oprnds1
.create (1);
9247 lhs
= gimple_assign_lhs (stmt
);
9248 mask
= vect_create_destination_var (lhs
, mask_type
);
9250 /* Handle cmp expr. */
9251 for (j
= 0; j
< ncopies
; j
++)
9253 stmt_vec_info new_stmt_info
= NULL
;
9258 auto_vec
<tree
, 2> ops
;
9259 auto_vec
<vec
<tree
>, 2> vec_defs
;
9261 ops
.safe_push (rhs1
);
9262 ops
.safe_push (rhs2
);
9263 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9264 vec_oprnds1
= vec_defs
.pop ();
9265 vec_oprnds0
= vec_defs
.pop ();
9269 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
9271 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
9277 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
9278 vec_oprnds0
.pop ());
9279 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
9280 vec_oprnds1
.pop ());
9285 vec_oprnds0
.quick_push (vec_rhs1
);
9286 vec_oprnds1
.quick_push (vec_rhs2
);
9289 /* Arguments are ready. Create the new vector stmt. */
9290 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
9292 vec_rhs2
= vec_oprnds1
[i
];
9294 new_temp
= make_ssa_name (mask
);
9295 if (bitop1
== NOP_EXPR
)
9297 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
9298 vec_rhs1
, vec_rhs2
);
9300 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9305 if (bitop1
== BIT_NOT_EXPR
)
9306 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
9308 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
9311 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9312 if (bitop2
!= NOP_EXPR
)
9314 tree res
= make_ssa_name (mask
);
9315 if (bitop2
== BIT_NOT_EXPR
)
9316 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
9318 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
9321 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9325 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9332 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9334 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9336 prev_stmt_info
= new_stmt_info
;
9339 vec_oprnds0
.release ();
9340 vec_oprnds1
.release ();
9345 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9346 can handle all live statements in the node. Otherwise return true
9347 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9348 GSI and VEC_STMT are as for vectorizable_live_operation. */
9351 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9352 slp_tree slp_node
, stmt_vec_info
*vec_stmt
,
9353 stmt_vector_for_cost
*cost_vec
)
9357 stmt_vec_info slp_stmt_info
;
9359 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
9361 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
9362 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
, i
,
9363 vec_stmt
, cost_vec
))
9367 else if (STMT_VINFO_LIVE_P (stmt_info
)
9368 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
, -1,
9369 vec_stmt
, cost_vec
))
9375 /* Make sure the statement is vectorizable. */
9378 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
9379 slp_tree node
, slp_instance node_instance
,
9380 stmt_vector_for_cost
*cost_vec
)
9382 vec_info
*vinfo
= stmt_info
->vinfo
;
9383 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9384 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
9386 gimple_seq pattern_def_seq
;
9388 if (dump_enabled_p ())
9389 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
9392 if (gimple_has_volatile_ops (stmt_info
->stmt
))
9393 return opt_result::failure_at (stmt_info
->stmt
,
9395 " stmt has volatile operands: %G\n",
9398 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9400 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
9402 gimple_stmt_iterator si
;
9404 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
9406 stmt_vec_info pattern_def_stmt_info
9407 = vinfo
->lookup_stmt (gsi_stmt (si
));
9408 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
9409 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
9411 /* Analyze def stmt of STMT if it's a pattern stmt. */
9412 if (dump_enabled_p ())
9413 dump_printf_loc (MSG_NOTE
, vect_location
,
9414 "==> examining pattern def statement: %G",
9415 pattern_def_stmt_info
->stmt
);
9418 = vect_analyze_stmt (pattern_def_stmt_info
,
9419 need_to_vectorize
, node
, node_instance
,
9427 /* Skip stmts that do not need to be vectorized. In loops this is expected
9429 - the COND_EXPR which is the loop exit condition
9430 - any LABEL_EXPRs in the loop
9431 - computations that are used only for array indexing or loop control.
9432 In basic blocks we only analyze statements that are a part of some SLP
9433 instance, therefore, all the statements are relevant.
9435 Pattern statement needs to be analyzed instead of the original statement
9436 if the original statement is not relevant. Otherwise, we analyze both
9437 statements. In basic blocks we are called from some SLP instance
9438 traversal, don't analyze pattern stmts instead, the pattern stmts
9439 already will be part of SLP instance. */
9441 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
9442 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
9443 && !STMT_VINFO_LIVE_P (stmt_info
))
9445 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9446 && pattern_stmt_info
9447 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
9448 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
9450 /* Analyze PATTERN_STMT instead of the original stmt. */
9451 stmt_info
= pattern_stmt_info
;
9452 if (dump_enabled_p ())
9453 dump_printf_loc (MSG_NOTE
, vect_location
,
9454 "==> examining pattern statement: %G",
9459 if (dump_enabled_p ())
9460 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
9462 return opt_result::success ();
9465 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9467 && pattern_stmt_info
9468 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
9469 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
9471 /* Analyze PATTERN_STMT too. */
9472 if (dump_enabled_p ())
9473 dump_printf_loc (MSG_NOTE
, vect_location
,
9474 "==> examining pattern statement: %G",
9475 pattern_stmt_info
->stmt
);
9478 = vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
9479 node_instance
, cost_vec
);
9484 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
9486 case vect_internal_def
:
9489 case vect_reduction_def
:
9490 case vect_nested_cycle
:
9491 gcc_assert (!bb_vinfo
9492 && (relevance
== vect_used_in_outer
9493 || relevance
== vect_used_in_outer_by_reduction
9494 || relevance
== vect_used_by_reduction
9495 || relevance
== vect_unused_in_scope
9496 || relevance
== vect_used_only_live
));
9499 case vect_induction_def
:
9500 gcc_assert (!bb_vinfo
);
9503 case vect_constant_def
:
9504 case vect_external_def
:
9505 case vect_unknown_def_type
:
9510 if (STMT_VINFO_RELEVANT_P (stmt_info
))
9512 tree type
= gimple_expr_type (stmt_info
->stmt
);
9513 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
9514 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9515 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
9516 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
9517 *need_to_vectorize
= true;
9520 if (PURE_SLP_STMT (stmt_info
) && !node
)
9522 dump_printf_loc (MSG_NOTE
, vect_location
,
9523 "handled only by SLP analysis\n");
9524 return opt_result::success ();
9529 && (STMT_VINFO_RELEVANT_P (stmt_info
)
9530 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
9531 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9532 -mveclibabi= takes preference over library functions with
9533 the simd attribute. */
9534 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9535 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
9537 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9538 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9539 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9540 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
9542 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9543 || vectorizable_reduction (stmt_info
, NULL
, NULL
, node
,
9544 node_instance
, cost_vec
)
9545 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9546 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9547 || vectorizable_condition (stmt_info
, NULL
, NULL
, NULL
, 0, node
,
9549 || vectorizable_comparison (stmt_info
, NULL
, NULL
, NULL
, node
,
9554 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9555 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
9557 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
9559 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9560 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9561 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
9563 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
9565 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9566 || vectorizable_condition (stmt_info
, NULL
, NULL
, NULL
, 0, node
,
9568 || vectorizable_comparison (stmt_info
, NULL
, NULL
, NULL
, node
,
9573 return opt_result::failure_at (stmt_info
->stmt
,
9575 " relevant stmt not supported: %G",
9578 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9579 need extra handling, except for vectorizable reductions. */
9581 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9582 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, NULL
, cost_vec
))
9583 return opt_result::failure_at (stmt_info
->stmt
,
9585 " live stmt not supported: %G",
9588 return opt_result::success ();
9592 /* Function vect_transform_stmt.
9594 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9597 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9598 slp_tree slp_node
, slp_instance slp_node_instance
)
9600 vec_info
*vinfo
= stmt_info
->vinfo
;
9601 bool is_store
= false;
9602 stmt_vec_info vec_stmt
= NULL
;
9605 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9606 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
9608 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
9609 && nested_in_vect_loop_p
9610 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
9613 gimple
*stmt
= stmt_info
->stmt
;
9614 switch (STMT_VINFO_TYPE (stmt_info
))
9616 case type_demotion_vec_info_type
:
9617 case type_promotion_vec_info_type
:
9618 case type_conversion_vec_info_type
:
9619 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9624 case induc_vec_info_type
:
9625 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9630 case shift_vec_info_type
:
9631 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9635 case op_vec_info_type
:
9636 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9641 case assignment_vec_info_type
:
9642 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9647 case load_vec_info_type
:
9648 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9649 slp_node_instance
, NULL
);
9653 case store_vec_info_type
:
9654 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9656 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9658 /* In case of interleaving, the whole chain is vectorized when the
9659 last store in the chain is reached. Store stmts before the last
9660 one are skipped, and there vec_stmt_info shouldn't be freed
9662 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9663 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
9670 case condition_vec_info_type
:
9671 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, NULL
, 0,
9676 case comparison_vec_info_type
:
9677 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
, NULL
,
9682 case call_vec_info_type
:
9683 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9684 stmt
= gsi_stmt (*gsi
);
9687 case call_simd_clone_vec_info_type
:
9688 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
9690 stmt
= gsi_stmt (*gsi
);
9693 case reduc_vec_info_type
:
9694 done
= vectorizable_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9695 slp_node_instance
, NULL
);
9700 if (!STMT_VINFO_LIVE_P (stmt_info
))
9702 if (dump_enabled_p ())
9703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9704 "stmt not supported.\n");
9709 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9710 This would break hybrid SLP vectorization. */
9712 gcc_assert (!vec_stmt
9713 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
9715 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9716 is being vectorized, but outside the immediately enclosing loop. */
9719 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9720 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9721 || STMT_VINFO_RELEVANT (stmt_info
) ==
9722 vect_used_in_outer_by_reduction
))
9724 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9725 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9726 imm_use_iterator imm_iter
;
9727 use_operand_p use_p
;
9730 if (dump_enabled_p ())
9731 dump_printf_loc (MSG_NOTE
, vect_location
,
9732 "Record the vdef for outer-loop vectorization.\n");
9734 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9735 (to be used when vectorizing outer-loop stmts that use the DEF of
9737 if (gimple_code (stmt
) == GIMPLE_PHI
)
9738 scalar_dest
= PHI_RESULT (stmt
);
9740 scalar_dest
= gimple_get_lhs (stmt
);
9742 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9743 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9745 stmt_vec_info exit_phi_info
9746 = vinfo
->lookup_stmt (USE_STMT (use_p
));
9747 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
9751 /* Handle stmts whose DEF is used outside the loop-nest that is
9752 being vectorized. */
9753 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9755 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
, &vec_stmt
,
9761 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9767 /* Remove a group of stores (for SLP or interleaving), free their
9771 vect_remove_stores (stmt_vec_info first_stmt_info
)
9773 vec_info
*vinfo
= first_stmt_info
->vinfo
;
9774 stmt_vec_info next_stmt_info
= first_stmt_info
;
9776 while (next_stmt_info
)
9778 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9779 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
9780 /* Free the attached stmt_vec_info and remove the stmt. */
9781 vinfo
->remove_stmt (next_stmt_info
);
9782 next_stmt_info
= tmp
;
9786 /* Function get_vectype_for_scalar_type_and_size.
9788 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9792 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9794 tree orig_scalar_type
= scalar_type
;
9795 scalar_mode inner_mode
;
9796 machine_mode simd_mode
;
9800 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9801 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9804 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9806 /* For vector types of elements whose mode precision doesn't
9807 match their types precision we use a element type of mode
9808 precision. The vectorization routines will have to make sure
9809 they support the proper result truncation/extension.
9810 We also make sure to build vector types with INTEGER_TYPE
9811 component type only. */
9812 if (INTEGRAL_TYPE_P (scalar_type
)
9813 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9814 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9815 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9816 TYPE_UNSIGNED (scalar_type
));
9818 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9819 When the component mode passes the above test simply use a type
9820 corresponding to that mode. The theory is that any use that
9821 would cause problems with this will disable vectorization anyway. */
9822 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9823 && !INTEGRAL_TYPE_P (scalar_type
))
9824 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9826 /* We can't build a vector type of elements with alignment bigger than
9828 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9829 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9830 TYPE_UNSIGNED (scalar_type
));
9832 /* If we felt back to using the mode fail if there was
9833 no scalar type for it. */
9834 if (scalar_type
== NULL_TREE
)
9837 /* If no size was supplied use the mode the target prefers. Otherwise
9838 lookup a vector mode of the specified size. */
9839 if (known_eq (size
, 0U))
9840 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9841 else if (!multiple_p (size
, nbytes
, &nunits
)
9842 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
9844 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9845 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
9848 vectype
= build_vector_type (scalar_type
, nunits
);
9850 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9851 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9854 /* Re-attach the address-space qualifier if we canonicalized the scalar
9856 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9857 return build_qualified_type
9858 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9863 poly_uint64 current_vector_size
;
9865 /* Function get_vectype_for_scalar_type.
9867 Returns the vector type corresponding to SCALAR_TYPE as supported
9871 get_vectype_for_scalar_type (tree scalar_type
)
9874 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9875 current_vector_size
);
9877 && known_eq (current_vector_size
, 0U))
9878 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9882 /* Function get_mask_type_for_scalar_type.
9884 Returns the mask type corresponding to a result of comparison
9885 of vectors of specified SCALAR_TYPE as supported by target. */
9888 get_mask_type_for_scalar_type (tree scalar_type
)
9890 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9895 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9896 current_vector_size
);
9899 /* Function get_same_sized_vectype
9901 Returns a vector type corresponding to SCALAR_TYPE of size
9902 VECTOR_TYPE if supported by the target. */
9905 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9907 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9908 return build_same_sized_truth_vector_type (vector_type
);
9910 return get_vectype_for_scalar_type_and_size
9911 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9914 /* Function vect_is_simple_use.
9917 VINFO - the vect info of the loop or basic block that is being vectorized.
9918 OPERAND - operand in the loop or bb.
9920 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
9921 case OPERAND is an SSA_NAME that is defined in the vectorizable region
9922 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
9923 the definition could be anywhere in the function
9924 DT - the type of definition
9926 Returns whether a stmt with OPERAND can be vectorized.
9927 For loops, supportable operands are constants, loop invariants, and operands
9928 that are defined by the current iteration of the loop. Unsupportable
9929 operands are those that are defined by a previous iteration of the loop (as
9930 is the case in reduction/induction computations).
9931 For basic blocks, supportable operands are constants and bb invariants.
9932 For now, operands defined outside the basic block are not supported. */
9935 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
9936 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
9938 if (def_stmt_info_out
)
9939 *def_stmt_info_out
= NULL
;
9941 *def_stmt_out
= NULL
;
9942 *dt
= vect_unknown_def_type
;
9944 if (dump_enabled_p ())
9946 dump_printf_loc (MSG_NOTE
, vect_location
,
9947 "vect_is_simple_use: operand ");
9948 if (TREE_CODE (operand
) == SSA_NAME
9949 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
9950 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
9952 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9955 if (CONSTANT_CLASS_P (operand
))
9956 *dt
= vect_constant_def
;
9957 else if (is_gimple_min_invariant (operand
))
9958 *dt
= vect_external_def
;
9959 else if (TREE_CODE (operand
) != SSA_NAME
)
9960 *dt
= vect_unknown_def_type
;
9961 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9962 *dt
= vect_external_def
;
9965 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
9966 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
9968 *dt
= vect_external_def
;
9971 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
9972 def_stmt
= stmt_vinfo
->stmt
;
9973 switch (gimple_code (def_stmt
))
9978 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9981 *dt
= vect_unknown_def_type
;
9984 if (def_stmt_info_out
)
9985 *def_stmt_info_out
= stmt_vinfo
;
9988 *def_stmt_out
= def_stmt
;
9991 if (dump_enabled_p ())
9993 dump_printf (MSG_NOTE
, ", type of def: ");
9996 case vect_uninitialized_def
:
9997 dump_printf (MSG_NOTE
, "uninitialized\n");
9999 case vect_constant_def
:
10000 dump_printf (MSG_NOTE
, "constant\n");
10002 case vect_external_def
:
10003 dump_printf (MSG_NOTE
, "external\n");
10005 case vect_internal_def
:
10006 dump_printf (MSG_NOTE
, "internal\n");
10008 case vect_induction_def
:
10009 dump_printf (MSG_NOTE
, "induction\n");
10011 case vect_reduction_def
:
10012 dump_printf (MSG_NOTE
, "reduction\n");
10014 case vect_double_reduction_def
:
10015 dump_printf (MSG_NOTE
, "double reduction\n");
10017 case vect_nested_cycle
:
10018 dump_printf (MSG_NOTE
, "nested cycle\n");
10020 case vect_unknown_def_type
:
10021 dump_printf (MSG_NOTE
, "unknown\n");
10026 if (*dt
== vect_unknown_def_type
)
10028 if (dump_enabled_p ())
10029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10030 "Unsupported pattern.\n");
10037 /* Function vect_is_simple_use.
10039 Same as vect_is_simple_use but also determines the vector operand
10040 type of OPERAND and stores it to *VECTYPE. If the definition of
10041 OPERAND is vect_uninitialized_def, vect_constant_def or
10042 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10043 is responsible to compute the best suited vector type for the
10047 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10048 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
10049 gimple
**def_stmt_out
)
10051 stmt_vec_info def_stmt_info
;
10053 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
10057 *def_stmt_out
= def_stmt
;
10058 if (def_stmt_info_out
)
10059 *def_stmt_info_out
= def_stmt_info
;
10061 /* Now get a vector type if the def is internal, otherwise supply
10062 NULL_TREE and leave it up to the caller to figure out a proper
10063 type for the use stmt. */
10064 if (*dt
== vect_internal_def
10065 || *dt
== vect_induction_def
10066 || *dt
== vect_reduction_def
10067 || *dt
== vect_double_reduction_def
10068 || *dt
== vect_nested_cycle
)
10070 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
10071 gcc_assert (*vectype
!= NULL_TREE
);
10072 if (dump_enabled_p ())
10073 dump_printf_loc (MSG_NOTE
, vect_location
,
10074 "vect_is_simple_use: vectype %T\n", *vectype
);
10076 else if (*dt
== vect_uninitialized_def
10077 || *dt
== vect_constant_def
10078 || *dt
== vect_external_def
)
10079 *vectype
= NULL_TREE
;
10081 gcc_unreachable ();
10087 /* Function supportable_widening_operation
10089 Check whether an operation represented by the code CODE is a
10090 widening operation that is supported by the target platform in
10091 vector form (i.e., when operating on arguments of type VECTYPE_IN
10092 producing a result of type VECTYPE_OUT).
10094 Widening operations we currently support are NOP (CONVERT), FLOAT,
10095 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10096 are supported by the target platform either directly (via vector
10097 tree-codes), or via target builtins.
10100 - CODE1 and CODE2 are codes of vector operations to be used when
10101 vectorizing the operation, if available.
10102 - MULTI_STEP_CVT determines the number of required intermediate steps in
10103 case of multi-step conversion (like char->short->int - in that case
10104 MULTI_STEP_CVT will be 1).
10105 - INTERM_TYPES contains the intermediate type required to perform the
10106 widening operation (short in the above example). */
10109 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
10110 tree vectype_out
, tree vectype_in
,
10111 enum tree_code
*code1
, enum tree_code
*code2
,
10112 int *multi_step_cvt
,
10113 vec
<tree
> *interm_types
)
10115 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10116 struct loop
*vect_loop
= NULL
;
10117 machine_mode vec_mode
;
10118 enum insn_code icode1
, icode2
;
10119 optab optab1
, optab2
;
10120 tree vectype
= vectype_in
;
10121 tree wide_vectype
= vectype_out
;
10122 enum tree_code c1
, c2
;
10124 tree prev_type
, intermediate_type
;
10125 machine_mode intermediate_mode
, prev_mode
;
10126 optab optab3
, optab4
;
10128 *multi_step_cvt
= 0;
10130 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
10134 case WIDEN_MULT_EXPR
:
10135 /* The result of a vectorized widening operation usually requires
10136 two vectors (because the widened results do not fit into one vector).
10137 The generated vector results would normally be expected to be
10138 generated in the same order as in the original scalar computation,
10139 i.e. if 8 results are generated in each vector iteration, they are
10140 to be organized as follows:
10141 vect1: [res1,res2,res3,res4],
10142 vect2: [res5,res6,res7,res8].
10144 However, in the special case that the result of the widening
10145 operation is used in a reduction computation only, the order doesn't
10146 matter (because when vectorizing a reduction we change the order of
10147 the computation). Some targets can take advantage of this and
10148 generate more efficient code. For example, targets like Altivec,
10149 that support widen_mult using a sequence of {mult_even,mult_odd}
10150 generate the following vectors:
10151 vect1: [res1,res3,res5,res7],
10152 vect2: [res2,res4,res6,res8].
10154 When vectorizing outer-loops, we execute the inner-loop sequentially
10155 (each vectorized inner-loop iteration contributes to VF outer-loop
10156 iterations in parallel). We therefore don't allow to change the
10157 order of the computation in the inner-loop during outer-loop
10159 /* TODO: Another case in which order doesn't *really* matter is when we
10160 widen and then contract again, e.g. (short)((int)x * y >> 8).
10161 Normally, pack_trunc performs an even/odd permute, whereas the
10162 repack from an even/odd expansion would be an interleave, which
10163 would be significantly simpler for e.g. AVX2. */
10164 /* In any case, in order to avoid duplicating the code below, recurse
10165 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10166 are properly set up for the caller. If we fail, we'll continue with
10167 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10169 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
10170 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
10171 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
10172 stmt_info
, vectype_out
,
10173 vectype_in
, code1
, code2
,
10174 multi_step_cvt
, interm_types
))
10176 /* Elements in a vector with vect_used_by_reduction property cannot
10177 be reordered if the use chain with this property does not have the
10178 same operation. One such an example is s += a * b, where elements
10179 in a and b cannot be reordered. Here we check if the vector defined
10180 by STMT is only directly used in the reduction statement. */
10181 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
10182 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
10184 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
10187 c1
= VEC_WIDEN_MULT_LO_EXPR
;
10188 c2
= VEC_WIDEN_MULT_HI_EXPR
;
10191 case DOT_PROD_EXPR
:
10192 c1
= DOT_PROD_EXPR
;
10193 c2
= DOT_PROD_EXPR
;
10201 case VEC_WIDEN_MULT_EVEN_EXPR
:
10202 /* Support the recursion induced just above. */
10203 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
10204 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
10207 case WIDEN_LSHIFT_EXPR
:
10208 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
10209 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
10213 c1
= VEC_UNPACK_LO_EXPR
;
10214 c2
= VEC_UNPACK_HI_EXPR
;
10218 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
10219 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
10222 case FIX_TRUNC_EXPR
:
10223 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
10224 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
10228 gcc_unreachable ();
10231 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
10232 std::swap (c1
, c2
);
10234 if (code
== FIX_TRUNC_EXPR
)
10236 /* The signedness is determined from output operand. */
10237 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10238 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
10242 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10243 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
10246 if (!optab1
|| !optab2
)
10249 vec_mode
= TYPE_MODE (vectype
);
10250 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
10251 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
10257 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10258 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10259 /* For scalar masks we may have different boolean
10260 vector types having the same QImode. Thus we
10261 add additional check for elements number. */
10262 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10263 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
10264 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10266 /* Check if it's a multi-step conversion that can be done using intermediate
10269 prev_type
= vectype
;
10270 prev_mode
= vec_mode
;
10272 if (!CONVERT_EXPR_CODE_P (code
))
10275 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10276 intermediate steps in promotion sequence. We try
10277 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10279 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10280 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10282 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10283 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10285 intermediate_type
= vect_halve_mask_nunits (prev_type
);
10286 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10291 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
10292 TYPE_UNSIGNED (prev_type
));
10294 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10295 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
10297 if (!optab3
|| !optab4
10298 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
10299 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10300 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
10301 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
10302 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
10303 == CODE_FOR_nothing
)
10304 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
10305 == CODE_FOR_nothing
))
10308 interm_types
->quick_push (intermediate_type
);
10309 (*multi_step_cvt
)++;
10311 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10312 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10313 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10314 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
10315 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10317 prev_type
= intermediate_type
;
10318 prev_mode
= intermediate_mode
;
10321 interm_types
->release ();
10326 /* Function supportable_narrowing_operation
10328 Check whether an operation represented by the code CODE is a
10329 narrowing operation that is supported by the target platform in
10330 vector form (i.e., when operating on arguments of type VECTYPE_IN
10331 and producing a result of type VECTYPE_OUT).
10333 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10334 and FLOAT. This function checks if these operations are supported by
10335 the target platform directly via vector tree-codes.
10338 - CODE1 is the code of a vector operation to be used when
10339 vectorizing the operation, if available.
10340 - MULTI_STEP_CVT determines the number of required intermediate steps in
10341 case of multi-step conversion (like int->short->char - in that case
10342 MULTI_STEP_CVT will be 1).
10343 - INTERM_TYPES contains the intermediate type required to perform the
10344 narrowing operation (short in the above example). */
10347 supportable_narrowing_operation (enum tree_code code
,
10348 tree vectype_out
, tree vectype_in
,
10349 enum tree_code
*code1
, int *multi_step_cvt
,
10350 vec
<tree
> *interm_types
)
10352 machine_mode vec_mode
;
10353 enum insn_code icode1
;
10354 optab optab1
, interm_optab
;
10355 tree vectype
= vectype_in
;
10356 tree narrow_vectype
= vectype_out
;
10358 tree intermediate_type
, prev_type
;
10359 machine_mode intermediate_mode
, prev_mode
;
10363 *multi_step_cvt
= 0;
10367 c1
= VEC_PACK_TRUNC_EXPR
;
10370 case FIX_TRUNC_EXPR
:
10371 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
10375 c1
= VEC_PACK_FLOAT_EXPR
;
10379 gcc_unreachable ();
10382 if (code
== FIX_TRUNC_EXPR
)
10383 /* The signedness is determined from output operand. */
10384 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10386 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10391 vec_mode
= TYPE_MODE (vectype
);
10392 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
10397 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10398 /* For scalar masks we may have different boolean
10399 vector types having the same QImode. Thus we
10400 add additional check for elements number. */
10401 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10402 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
10403 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10405 if (code
== FLOAT_EXPR
)
10408 /* Check if it's a multi-step conversion that can be done using intermediate
10410 prev_mode
= vec_mode
;
10411 prev_type
= vectype
;
10412 if (code
== FIX_TRUNC_EXPR
)
10413 uns
= TYPE_UNSIGNED (vectype_out
);
10415 uns
= TYPE_UNSIGNED (vectype
);
10417 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10418 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10419 costly than signed. */
10420 if (code
== FIX_TRUNC_EXPR
&& uns
)
10422 enum insn_code icode2
;
10425 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
10427 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10428 if (interm_optab
!= unknown_optab
10429 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
10430 && insn_data
[icode1
].operand
[0].mode
10431 == insn_data
[icode2
].operand
[0].mode
)
10434 optab1
= interm_optab
;
10439 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10440 intermediate steps in promotion sequence. We try
10441 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10442 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10443 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10445 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10446 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10448 intermediate_type
= vect_double_mask_nunits (prev_type
);
10449 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10454 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10456 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10459 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10460 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10461 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10462 == CODE_FOR_nothing
))
10465 interm_types
->quick_push (intermediate_type
);
10466 (*multi_step_cvt
)++;
10468 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10469 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10470 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10471 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10473 prev_mode
= intermediate_mode
;
10474 prev_type
= intermediate_type
;
10475 optab1
= interm_optab
;
10478 interm_types
->release ();
10482 /* Generate and return a statement that sets vector mask MASK such that
10483 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10486 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10488 tree cmp_type
= TREE_TYPE (start_index
);
10489 tree mask_type
= TREE_TYPE (mask
);
10490 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10491 cmp_type
, mask_type
,
10492 OPTIMIZE_FOR_SPEED
));
10493 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10494 start_index
, end_index
,
10495 build_zero_cst (mask_type
));
10496 gimple_call_set_lhs (call
, mask
);
10500 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10501 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10504 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10507 tree tmp
= make_ssa_name (mask_type
);
10508 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10509 gimple_seq_add_stmt (seq
, call
);
10510 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
10513 /* Try to compute the vector types required to vectorize STMT_INFO,
10514 returning true on success and false if vectorization isn't possible.
10518 - Set *STMT_VECTYPE_OUT to:
10519 - NULL_TREE if the statement doesn't need to be vectorized;
10520 - boolean_type_node if the statement is a boolean operation whose
10521 vector type can only be determined once all the other vector types
10523 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10525 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10526 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10527 statement does not help to determine the overall number of units. */
10530 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
10531 tree
*stmt_vectype_out
,
10532 tree
*nunits_vectype_out
)
10534 gimple
*stmt
= stmt_info
->stmt
;
10536 *stmt_vectype_out
= NULL_TREE
;
10537 *nunits_vectype_out
= NULL_TREE
;
10539 if (gimple_get_lhs (stmt
) == NULL_TREE
10540 /* MASK_STORE has no lhs, but is ok. */
10541 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10543 if (is_a
<gcall
*> (stmt
))
10545 /* Ignore calls with no lhs. These must be calls to
10546 #pragma omp simd functions, and what vectorization factor
10547 it really needs can't be determined until
10548 vectorizable_simd_clone_call. */
10549 if (dump_enabled_p ())
10550 dump_printf_loc (MSG_NOTE
, vect_location
,
10551 "defer to SIMD clone analysis.\n");
10552 return opt_result::success ();
10555 return opt_result::failure_at (stmt
,
10556 "not vectorized: irregular stmt.%G", stmt
);
10559 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
10560 return opt_result::failure_at (stmt
,
10561 "not vectorized: vector stmt in loop:%G",
10565 tree scalar_type
= NULL_TREE
;
10566 if (STMT_VINFO_VECTYPE (stmt_info
))
10567 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10570 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
10571 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10572 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
10574 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
10576 /* Pure bool ops don't participate in number-of-units computation.
10577 For comparisons use the types being compared. */
10578 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
10579 && is_gimple_assign (stmt
)
10580 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
10582 *stmt_vectype_out
= boolean_type_node
;
10584 tree rhs1
= gimple_assign_rhs1 (stmt
);
10585 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10586 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10587 scalar_type
= TREE_TYPE (rhs1
);
10590 if (dump_enabled_p ())
10591 dump_printf_loc (MSG_NOTE
, vect_location
,
10592 "pure bool operation.\n");
10593 return opt_result::success ();
10597 if (dump_enabled_p ())
10598 dump_printf_loc (MSG_NOTE
, vect_location
,
10599 "get vectype for scalar type: %T\n", scalar_type
);
10600 vectype
= get_vectype_for_scalar_type (scalar_type
);
10602 return opt_result::failure_at (stmt
,
10604 " unsupported data-type %T\n",
10607 if (!*stmt_vectype_out
)
10608 *stmt_vectype_out
= vectype
;
10610 if (dump_enabled_p ())
10611 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
10614 /* Don't try to compute scalar types if the stmt produces a boolean
10615 vector; use the existing vector type instead. */
10616 tree nunits_vectype
;
10617 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10618 nunits_vectype
= vectype
;
10621 /* The number of units is set according to the smallest scalar
10622 type (or the largest vector size, but we only support one
10623 vector size per vectorization). */
10624 if (*stmt_vectype_out
!= boolean_type_node
)
10626 HOST_WIDE_INT dummy
;
10627 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
10630 if (dump_enabled_p ())
10631 dump_printf_loc (MSG_NOTE
, vect_location
,
10632 "get vectype for scalar type: %T\n", scalar_type
);
10633 nunits_vectype
= get_vectype_for_scalar_type (scalar_type
);
10635 if (!nunits_vectype
)
10636 return opt_result::failure_at (stmt
,
10637 "not vectorized: unsupported data-type %T\n",
10640 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
10641 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
10642 return opt_result::failure_at (stmt
,
10643 "not vectorized: different sized vector "
10644 "types in statement, %T and %T\n",
10645 vectype
, nunits_vectype
);
10647 if (dump_enabled_p ())
10649 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n",
10652 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
10653 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
10654 dump_printf (MSG_NOTE
, "\n");
10657 *nunits_vectype_out
= nunits_vectype
;
10658 return opt_result::success ();
10661 /* Try to determine the correct vector type for STMT_INFO, which is a
10662 statement that produces a scalar boolean result. Return the vector
10663 type on success, otherwise return NULL_TREE. */
10666 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
10668 gimple
*stmt
= stmt_info
->stmt
;
10669 tree mask_type
= NULL
;
10670 tree vectype
, scalar_type
;
10672 if (is_gimple_assign (stmt
)
10673 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10674 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
10676 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
10677 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
10680 return opt_tree::failure_at (stmt
,
10681 "not vectorized: unsupported mask\n");
10687 enum vect_def_type dt
;
10689 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
10691 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
10692 return opt_tree::failure_at (stmt
,
10693 "not vectorized:can't compute mask"
10694 " type for statement, %G", stmt
);
10696 /* No vectype probably means external definition.
10697 Allow it in case there is another operand which
10698 allows to determine mask type. */
10703 mask_type
= vectype
;
10704 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
10705 TYPE_VECTOR_SUBPARTS (vectype
)))
10706 return opt_tree::failure_at (stmt
,
10707 "not vectorized: different sized mask"
10708 " types in statement, %T and %T\n",
10709 mask_type
, vectype
);
10710 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
10711 != VECTOR_BOOLEAN_TYPE_P (vectype
))
10712 return opt_tree::failure_at (stmt
,
10713 "not vectorized: mixed mask and "
10714 "nonmask vector types in statement, "
10716 mask_type
, vectype
);
10719 /* We may compare boolean value loaded as vector of integers.
10720 Fix mask_type in such case. */
10722 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
10723 && gimple_code (stmt
) == GIMPLE_ASSIGN
10724 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
10725 mask_type
= build_same_sized_truth_vector_type (mask_type
);
10728 /* No mask_type should mean loop invariant predicate.
10729 This is probably a subject for optimization in if-conversion. */
10731 return opt_tree::failure_at (stmt
,
10732 "not vectorized: can't compute mask type "
10733 "for statement: %G", stmt
);
10735 return opt_tree::success (mask_type
);