1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
94 if ((kind
== vector_load
|| kind
== unaligned_load
)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
96 kind
= vector_gather_load
;
97 if ((kind
== vector_store
|| kind
== unaligned_store
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_scatter_store
;
101 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
102 body_cost_vec
->safe_push (si
);
104 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
106 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
112 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
114 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
124 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
125 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
127 tree vect_type
, vect
, vect_name
, array_ref
;
130 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
131 vect_type
= TREE_TYPE (TREE_TYPE (array
));
132 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
133 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
134 build_int_cst (size_type_node
, n
),
135 NULL_TREE
, NULL_TREE
);
137 new_stmt
= gimple_build_assign (vect
, array_ref
);
138 vect_name
= make_ssa_name (vect
, new_stmt
);
139 gimple_assign_set_lhs (new_stmt
, vect_name
);
140 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
150 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
151 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
156 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
157 build_int_cst (size_type_node
, n
),
158 NULL_TREE
, NULL_TREE
);
160 new_stmt
= gimple_build_assign (array_ref
, vect
);
161 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
169 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
173 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
183 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
186 tree clobber
= build_clobber (TREE_TYPE (var
));
187 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
188 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
198 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
199 enum vect_relevant relevant
, bool live_p
)
201 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
202 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE
, vect_location
,
207 "mark relevant %d, live %d: ", relevant
, live_p
);
208 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_NOTE
, vect_location
,
224 "last stmt in pattern. don't mark"
225 " relevant/live.\n");
226 stmt_vec_info old_stmt_info
= stmt_info
;
227 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
228 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
229 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
230 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
233 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
234 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
235 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
237 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
238 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
240 if (dump_enabled_p ())
241 dump_printf_loc (MSG_NOTE
, vect_location
,
242 "already marked relevant/live.\n");
246 worklist
->safe_push (stmt_info
);
250 /* Function is_simple_and_all_uses_invariant
252 Return true if STMT_INFO is simple and all uses of it are invariant. */
255 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
256 loop_vec_info loop_vinfo
)
261 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
265 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
267 enum vect_def_type dt
= vect_uninitialized_def
;
269 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
271 if (dump_enabled_p ())
272 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
273 "use not simple.\n");
277 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
286 is "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
296 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
297 enum vect_relevant
*relevant
, bool *live_p
)
299 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
301 imm_use_iterator imm_iter
;
305 *relevant
= vect_unused_in_scope
;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt_info
->stmt
)
310 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
311 *relevant
= vect_used_in_scope
;
313 /* changing memory. */
314 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
315 if (gimple_vdef (stmt_info
->stmt
)
316 && !gimple_clobber_p (stmt_info
->stmt
))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE
, vect_location
,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant
= vect_used_in_scope
;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
327 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
329 basic_block bb
= gimple_bb (USE_STMT (use_p
));
330 if (!flow_bb_inside_loop_p (loop
, bb
))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE
, vect_location
,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p
)))
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
342 gcc_assert (bb
== single_exit (loop
)->dest
);
349 if (*live_p
&& *relevant
== vect_unused_in_scope
350 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE
, vect_location
,
354 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355 *relevant
= vect_used_only_live
;
358 return (*live_p
|| *relevant
);
362 /* Function exist_non_indexing_operands_for_use_p
364 USE is one of the uses attached to STMT_INFO. Check if USE is
365 used in STMT_INFO for anything other than indexing an array. */
368 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
372 /* USE corresponds to some operand in STMT. If there is no data
373 reference in STMT, then any operand that corresponds to USE
374 is not indexing an array. */
375 if (!STMT_VINFO_DATA_REF (stmt_info
))
378 /* STMT has a data_ref. FORNOW this means that its of one of
382 (This should have been verified in analyze_data_refs).
384 'var' in the second case corresponds to a def, not a use,
385 so USE cannot correspond to any operands that are not used
388 Therefore, all we need to check is if STMT falls into the
389 first case, and whether var corresponds to USE. */
391 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
392 if (!assign
|| !gimple_assign_copy_p (assign
))
394 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
395 if (call
&& gimple_call_internal_p (call
))
397 internal_fn ifn
= gimple_call_internal_fn (call
);
398 int mask_index
= internal_fn_mask_index (ifn
);
400 && use
== gimple_call_arg (call
, mask_index
))
402 int stored_value_index
= internal_fn_stored_value_index (ifn
);
403 if (stored_value_index
>= 0
404 && use
== gimple_call_arg (call
, stored_value_index
))
406 if (internal_gather_scatter_fn_p (ifn
)
407 && use
== gimple_call_arg (call
, 1))
413 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
415 operand
= gimple_assign_rhs1 (assign
);
416 if (TREE_CODE (operand
) != SSA_NAME
)
427 Function process_use.
430 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
431 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
432 that defined USE. This is done by calling mark_relevant and passing it
433 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
434 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 Generally, LIVE_P and RELEVANT are used to define the liveness and
439 relevance info of the DEF_STMT of this USE:
440 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
441 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 - case 1: If USE is used only for address computations (e.g. array indexing),
444 which does not need to be directly vectorized, then the liveness/relevance
445 of the respective DEF_STMT is left unchanged.
446 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
447 we skip DEF_STMT cause it had already been processed.
448 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
449 "relevant" will be modified accordingly.
451 Return true if everything is as expected. Return false otherwise. */
454 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
455 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
458 stmt_vec_info dstmt_vinfo
;
459 basic_block bb
, def_bb
;
460 enum vect_def_type dt
;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
471 "not vectorized: unsupported use in stmt.\n");
478 def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 DSTMT_VINFO must have already been processed, because this should be the
482 only way that STMT, which is a reduction-phi, was put in the worklist,
483 as there should be no other uses for DSTMT_VINFO in the loop. So we just
484 check that everything is as expected, and we are done. */
485 bb
= gimple_bb (stmt_vinfo
->stmt
);
486 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
488 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
490 && bb
->loop_father
== def_bb
->loop_father
)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE
, vect_location
,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
496 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
497 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
501 /* case 3a: outer-loop stmt defining an inner-loop stmt:
502 outer-loop-header-bb:
508 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
510 if (dump_enabled_p ())
511 dump_printf_loc (MSG_NOTE
, vect_location
,
512 "outer-loop def-stmt defining inner-loop stmt.\n");
516 case vect_unused_in_scope
:
517 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
518 vect_used_in_scope
: vect_unused_in_scope
;
521 case vect_used_in_outer_by_reduction
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_by_reduction
;
526 case vect_used_in_outer
:
527 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
528 relevant
= vect_used_in_scope
;
531 case vect_used_in_scope
:
539 /* case 3b: inner-loop stmt defining an outer-loop stmt:
540 outer-loop-header-bb:
544 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
546 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
548 if (dump_enabled_p ())
549 dump_printf_loc (MSG_NOTE
, vect_location
,
550 "inner-loop def-stmt defining outer-loop stmt.\n");
554 case vect_unused_in_scope
:
555 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
556 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
557 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
560 case vect_used_by_reduction
:
561 case vect_used_only_live
:
562 relevant
= vect_used_in_outer_by_reduction
;
565 case vect_used_in_scope
:
566 relevant
= vect_used_in_outer
;
573 /* We are also not interested in uses on loop PHI backedges that are
574 inductions. Otherwise we'll needlessly vectorize the IV increment
575 and cause hybrid SLP for SLP inductions. Unless the PHI is live
577 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
578 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
579 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
580 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
581 loop_latch_edge (bb
->loop_father
))
584 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE
, vect_location
,
586 "induction value on backedge.\n");
591 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
596 /* Function vect_mark_stmts_to_be_vectorized.
598 Not all stmts in the loop need to be vectorized. For example:
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
610 This pass detects such stmts. */
613 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
615 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
616 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
617 unsigned int nbbs
= loop
->num_nodes
;
618 gimple_stmt_iterator si
;
622 enum vect_relevant relevant
;
624 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
626 auto_vec
<stmt_vec_info
, 64> worklist
;
628 /* 1. Init worklist. */
629 for (i
= 0; i
< nbbs
; i
++)
632 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
634 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
635 if (dump_enabled_p ())
637 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
638 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi_info
->stmt
, 0);
641 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
642 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
644 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
646 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
647 if (dump_enabled_p ())
649 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
650 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
653 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
654 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
658 /* 2. Process_worklist */
659 while (worklist
.length () > 0)
664 stmt_vec_info stmt_vinfo
= worklist
.pop ();
665 if (dump_enabled_p ())
667 dump_printf_loc (MSG_NOTE
, vect_location
,
668 "worklist: examine stmt: ");
669 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_vinfo
->stmt
, 0);
672 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
673 (DEF_STMT) as relevant/irrelevant according to the relevance property
675 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
677 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
678 propagated as is to the DEF_STMTs of its USEs.
680 One exception is when STMT has been identified as defining a reduction
681 variable; in this case we set the relevance to vect_used_by_reduction.
682 This is because we distinguish between two kinds of relevant stmts -
683 those that are used by a reduction computation, and those that are
684 (also) used by a regular computation. This allows us later on to
685 identify stmts that are used solely by a reduction, and therefore the
686 order of the results that they produce does not have to be kept. */
688 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
690 case vect_reduction_def
:
691 gcc_assert (relevant
!= vect_unused_in_scope
);
692 if (relevant
!= vect_unused_in_scope
693 && relevant
!= vect_used_in_scope
694 && relevant
!= vect_used_by_reduction
695 && relevant
!= vect_used_only_live
)
697 if (dump_enabled_p ())
698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
699 "unsupported use of reduction.\n");
704 case vect_nested_cycle
:
705 if (relevant
!= vect_unused_in_scope
706 && relevant
!= vect_used_in_outer_by_reduction
707 && relevant
!= vect_used_in_outer
)
709 if (dump_enabled_p ())
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
711 "unsupported use of nested cycle.\n");
717 case vect_double_reduction_def
:
718 if (relevant
!= vect_unused_in_scope
719 && relevant
!= vect_used_by_reduction
720 && relevant
!= vect_used_only_live
)
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
724 "unsupported use of double reduction.\n");
734 if (is_pattern_stmt_p (stmt_vinfo
))
736 /* Pattern statements are not inserted into the code, so
737 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
738 have to scan the RHS or function arguments instead. */
739 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
741 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
742 tree op
= gimple_assign_rhs1 (assign
);
745 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
747 if (!process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
748 loop_vinfo
, relevant
, &worklist
, false)
749 || !process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
750 loop_vinfo
, relevant
, &worklist
, false))
754 for (; i
< gimple_num_ops (assign
); i
++)
756 op
= gimple_op (assign
, i
);
757 if (TREE_CODE (op
) == SSA_NAME
758 && !process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
763 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
765 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
767 tree arg
= gimple_call_arg (call
, i
);
768 if (!process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
775 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
777 tree op
= USE_FROM_PTR (use_p
);
778 if (!process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
783 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
785 gather_scatter_info gs_info
;
786 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
788 if (!process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
792 } /* while worklist */
797 /* Compute the prologue cost for invariant or constant operands. */
800 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
801 unsigned opno
, enum vect_def_type dt
,
802 stmt_vector_for_cost
*cost_vec
)
804 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
805 tree op
= gimple_op (stmt
, opno
);
806 unsigned prologue_cost
= 0;
808 /* Without looking at the actual initializer a vector of
809 constants can be implemented as load from the constant pool.
810 When all elements are the same we can use a splat. */
811 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
812 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
813 unsigned num_vects_to_check
;
814 unsigned HOST_WIDE_INT const_nunits
;
816 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
817 && ! multiple_p (const_nunits
, group_size
))
819 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
820 nelt_limit
= const_nunits
;
824 /* If either the vector has variable length or the vectors
825 are composed of repeated whole groups we only need to
826 cost construction once. All vectors will be the same. */
827 num_vects_to_check
= 1;
828 nelt_limit
= group_size
;
830 tree elt
= NULL_TREE
;
832 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
834 unsigned si
= j
% group_size
;
836 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
837 /* ??? We're just tracking whether all operands of a single
838 vector initializer are the same, ideally we'd check if
839 we emitted the same one already. */
840 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
844 if (nelt
== nelt_limit
)
846 /* ??? We need to pass down stmt_info for a vector type
847 even if it points to the wrong stmt. */
848 prologue_cost
+= record_stmt_cost
850 dt
== vect_external_def
851 ? (elt
? scalar_to_vec
: vec_construct
)
853 stmt_info
, 0, vect_prologue
);
858 return prologue_cost
;
861 /* Function vect_model_simple_cost.
863 Models cost for simple operations, i.e. those that only emit ncopies of a
864 single op. Right now, this does not account for multiple insns that could
865 be generated for the single vector op. We will handle that shortly. */
868 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
869 enum vect_def_type
*dt
,
872 stmt_vector_for_cost
*cost_vec
)
874 int inside_cost
= 0, prologue_cost
= 0;
876 gcc_assert (cost_vec
!= NULL
);
878 /* ??? Somehow we need to fix this at the callers. */
880 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
884 /* Scan operands and account for prologue cost of constants/externals.
885 ??? This over-estimates cost for multiple uses and should be
887 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
888 tree lhs
= gimple_get_lhs (stmt
);
889 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
891 tree op
= gimple_op (stmt
, i
);
892 enum vect_def_type dt
;
893 if (!op
|| op
== lhs
)
895 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
896 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
897 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
902 /* Cost the "broadcast" of a scalar operand in to a vector operand.
903 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
905 for (int i
= 0; i
< ndts
; i
++)
906 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
907 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
908 stmt_info
, 0, vect_prologue
);
910 /* Adjust for two-operator SLP nodes. */
911 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
914 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
915 stmt_info
, 0, vect_body
);
918 /* Pass the inside-of-loop statements to the target-specific cost model. */
919 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
920 stmt_info
, 0, vect_body
);
922 if (dump_enabled_p ())
923 dump_printf_loc (MSG_NOTE
, vect_location
,
924 "vect_model_simple_cost: inside_cost = %d, "
925 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
929 /* Model cost for type demotion and promotion operations. PWR is normally
930 zero for single-step promotions and demotions. It will be one if
931 two-step promotion/demotion is required, and so on. Each additional
932 step doubles the number of instructions required. */
935 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
936 enum vect_def_type
*dt
, int pwr
,
937 stmt_vector_for_cost
*cost_vec
)
940 int inside_cost
= 0, prologue_cost
= 0;
942 for (i
= 0; i
< pwr
+ 1; i
++)
944 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
946 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
947 vec_promote_demote
, stmt_info
, 0,
951 /* FORNOW: Assuming maximum 2 args per stmts. */
952 for (i
= 0; i
< 2; i
++)
953 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
954 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
955 stmt_info
, 0, vect_prologue
);
957 if (dump_enabled_p ())
958 dump_printf_loc (MSG_NOTE
, vect_location
,
959 "vect_model_promotion_demotion_cost: inside_cost = %d, "
960 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
963 /* Function vect_model_store_cost
965 Models cost for stores. In the case of grouped accesses, one access
966 has the overhead of the grouped access attributed to it. */
969 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
970 enum vect_def_type dt
,
971 vect_memory_access_type memory_access_type
,
972 vec_load_store_type vls_type
, slp_tree slp_node
,
973 stmt_vector_for_cost
*cost_vec
)
975 unsigned int inside_cost
= 0, prologue_cost
= 0;
976 stmt_vec_info first_stmt_info
= stmt_info
;
977 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
979 /* ??? Somehow we need to fix this at the callers. */
981 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
983 if (vls_type
== VLS_STORE_INVARIANT
)
986 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
989 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
990 stmt_info
, 0, vect_prologue
);
993 /* Grouped stores update all elements in the group at once,
994 so we want the DR for the first statement. */
995 if (!slp_node
&& grouped_access_p
)
996 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
998 /* True if we should include any once-per-group costs as well as
999 the cost of the statement itself. For SLP we only get called
1000 once per group anyhow. */
1001 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1003 /* We assume that the cost of a single store-lanes instruction is
1004 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1005 access is instead being provided by a permute-and-store operation,
1006 include the cost of the permutes. */
1008 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1010 /* Uses a high and low interleave or shuffle operations for each
1012 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1013 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1014 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1015 stmt_info
, 0, vect_body
);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE
, vect_location
,
1019 "vect_model_store_cost: strided group_size = %d .\n",
1023 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1024 /* Costs of the stores. */
1025 if (memory_access_type
== VMAT_ELEMENTWISE
1026 || memory_access_type
== VMAT_GATHER_SCATTER
)
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1030 inside_cost
+= record_stmt_cost (cost_vec
,
1031 ncopies
* assumed_nunits
,
1032 scalar_store
, stmt_info
, 0, vect_body
);
1035 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1037 if (memory_access_type
== VMAT_ELEMENTWISE
1038 || memory_access_type
== VMAT_STRIDED_SLP
)
1040 /* N scalar stores plus extracting the elements. */
1041 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1042 inside_cost
+= record_stmt_cost (cost_vec
,
1043 ncopies
* assumed_nunits
,
1044 vec_to_scalar
, stmt_info
, 0, vect_body
);
1047 if (dump_enabled_p ())
1048 dump_printf_loc (MSG_NOTE
, vect_location
,
1049 "vect_model_store_cost: inside_cost = %d, "
1050 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1054 /* Calculate cost of DR's memory access. */
1056 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1057 unsigned int *inside_cost
,
1058 stmt_vector_for_cost
*body_cost_vec
)
1060 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1061 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1063 switch (alignment_support_scheme
)
1067 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1068 vector_store
, stmt_info
, 0,
1071 if (dump_enabled_p ())
1072 dump_printf_loc (MSG_NOTE
, vect_location
,
1073 "vect_model_store_cost: aligned.\n");
1077 case dr_unaligned_supported
:
1079 /* Here, we assign an additional cost for the unaligned store. */
1080 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1081 unaligned_store
, stmt_info
,
1082 DR_MISALIGNMENT (dr
), vect_body
);
1083 if (dump_enabled_p ())
1084 dump_printf_loc (MSG_NOTE
, vect_location
,
1085 "vect_model_store_cost: unaligned supported by "
1090 case dr_unaligned_unsupported
:
1092 *inside_cost
= VECT_MAX_COST
;
1094 if (dump_enabled_p ())
1095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1096 "vect_model_store_cost: unsupported access.\n");
1106 /* Function vect_model_load_cost
1108 Models cost for loads. In the case of grouped accesses, one access has
1109 the overhead of the grouped access attributed to it. Since unaligned
1110 accesses are supported for loads, we also account for the costs of the
1111 access scheme chosen. */
1114 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1115 vect_memory_access_type memory_access_type
,
1116 slp_instance instance
,
1118 stmt_vector_for_cost
*cost_vec
)
1120 unsigned int inside_cost
= 0, prologue_cost
= 0;
1121 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1123 gcc_assert (cost_vec
);
1125 /* ??? Somehow we need to fix this at the callers. */
1127 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1129 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1131 /* If the load is permuted then the alignment is determined by
1132 the first group element not by the first scalar stmt DR. */
1133 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1134 /* Record the cost for the permutation. */
1136 unsigned assumed_nunits
1137 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1138 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1139 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1140 slp_vf
, instance
, true,
1142 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1143 first_stmt_info
, 0, vect_body
);
1144 /* And adjust the number of loads performed. This handles
1145 redundancies as well as loads that are later dead. */
1146 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1147 bitmap_clear (perm
);
1148 for (unsigned i
= 0;
1149 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1150 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1152 bool load_seen
= false;
1153 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1155 if (i
% assumed_nunits
== 0)
1161 if (bitmap_bit_p (perm
, i
))
1167 <= (DR_GROUP_SIZE (first_stmt_info
)
1168 - DR_GROUP_GAP (first_stmt_info
)
1169 + assumed_nunits
- 1) / assumed_nunits
);
1172 /* Grouped loads read all elements in the group at once,
1173 so we want the DR for the first statement. */
1174 stmt_vec_info first_stmt_info
= stmt_info
;
1175 if (!slp_node
&& grouped_access_p
)
1176 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1178 /* True if we should include any once-per-group costs as well as
1179 the cost of the statement itself. For SLP we only get called
1180 once per group anyhow. */
1181 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1183 /* We assume that the cost of a single load-lanes instruction is
1184 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1185 access is instead being provided by a load-and-permute operation,
1186 include the cost of the permutes. */
1188 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1190 /* Uses an even and odd extract operations or shuffle operations
1191 for each needed permute. */
1192 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1193 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1194 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1195 stmt_info
, 0, vect_body
);
1197 if (dump_enabled_p ())
1198 dump_printf_loc (MSG_NOTE
, vect_location
,
1199 "vect_model_load_cost: strided group_size = %d .\n",
1203 /* The loads themselves. */
1204 if (memory_access_type
== VMAT_ELEMENTWISE
1205 || memory_access_type
== VMAT_GATHER_SCATTER
)
1207 /* N scalar loads plus gathering them into a vector. */
1208 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1209 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1210 inside_cost
+= record_stmt_cost (cost_vec
,
1211 ncopies
* assumed_nunits
,
1212 scalar_load
, stmt_info
, 0, vect_body
);
1215 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1216 &inside_cost
, &prologue_cost
,
1217 cost_vec
, cost_vec
, true);
1218 if (memory_access_type
== VMAT_ELEMENTWISE
1219 || memory_access_type
== VMAT_STRIDED_SLP
)
1220 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1221 stmt_info
, 0, vect_body
);
1223 if (dump_enabled_p ())
1224 dump_printf_loc (MSG_NOTE
, vect_location
,
1225 "vect_model_load_cost: inside_cost = %d, "
1226 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1230 /* Calculate cost of DR's memory access. */
1232 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1233 bool add_realign_cost
, unsigned int *inside_cost
,
1234 unsigned int *prologue_cost
,
1235 stmt_vector_for_cost
*prologue_cost_vec
,
1236 stmt_vector_for_cost
*body_cost_vec
,
1237 bool record_prologue_costs
)
1239 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1240 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1242 switch (alignment_support_scheme
)
1246 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1247 stmt_info
, 0, vect_body
);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE
, vect_location
,
1251 "vect_model_load_cost: aligned.\n");
1255 case dr_unaligned_supported
:
1257 /* Here, we assign an additional cost for the unaligned load. */
1258 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1259 unaligned_load
, stmt_info
,
1260 DR_MISALIGNMENT (dr
), vect_body
);
1262 if (dump_enabled_p ())
1263 dump_printf_loc (MSG_NOTE
, vect_location
,
1264 "vect_model_load_cost: unaligned supported by "
1269 case dr_explicit_realign
:
1271 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1272 vector_load
, stmt_info
, 0, vect_body
);
1273 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1274 vec_perm
, stmt_info
, 0, vect_body
);
1276 /* FIXME: If the misalignment remains fixed across the iterations of
1277 the containing loop, the following cost should be added to the
1279 if (targetm
.vectorize
.builtin_mask_for_load
)
1280 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1281 stmt_info
, 0, vect_body
);
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "vect_model_load_cost: explicit realign\n");
1289 case dr_explicit_realign_optimized
:
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE
, vect_location
,
1293 "vect_model_load_cost: unaligned software "
1296 /* Unaligned software pipeline has a load of an address, an initial
1297 load, and possibly a mask operation to "prime" the loop. However,
1298 if this is an access in a group of loads, which provide grouped
1299 access, then the above cost should only be considered for one
1300 access in the group. Inside the loop, there is a load op
1301 and a realignment op. */
1303 if (add_realign_cost
&& record_prologue_costs
)
1305 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1306 vector_stmt
, stmt_info
,
1308 if (targetm
.vectorize
.builtin_mask_for_load
)
1309 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1310 vector_stmt
, stmt_info
,
1314 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1315 stmt_info
, 0, vect_body
);
1316 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1317 stmt_info
, 0, vect_body
);
1319 if (dump_enabled_p ())
1320 dump_printf_loc (MSG_NOTE
, vect_location
,
1321 "vect_model_load_cost: explicit realign optimized"
1327 case dr_unaligned_unsupported
:
1329 *inside_cost
= VECT_MAX_COST
;
1331 if (dump_enabled_p ())
1332 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1333 "vect_model_load_cost: unsupported access.\n");
1342 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1343 the loop preheader for the vectorized stmt STMT_VINFO. */
1346 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1347 gimple_stmt_iterator
*gsi
)
1350 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1353 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1357 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1361 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1364 pe
= loop_preheader_edge (loop
);
1365 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1366 gcc_assert (!new_bb
);
1370 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1372 gimple_stmt_iterator gsi_bb_start
;
1374 gcc_assert (bb_vinfo
);
1375 bb
= BB_VINFO_BB (bb_vinfo
);
1376 gsi_bb_start
= gsi_after_labels (bb
);
1377 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1381 if (dump_enabled_p ())
1383 dump_printf_loc (MSG_NOTE
, vect_location
,
1384 "created new init_stmt: ");
1385 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1389 /* Function vect_init_vector.
1391 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1392 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1393 vector type a vector with all elements equal to VAL is created first.
1394 Place the initialization at BSI if it is not NULL. Otherwise, place the
1395 initialization at the loop preheader.
1396 Return the DEF of INIT_STMT.
1397 It will be used in the vectorization of STMT_INFO. */
1400 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1401 gimple_stmt_iterator
*gsi
)
1406 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1407 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1409 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1410 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1412 /* Scalar boolean value should be transformed into
1413 all zeros or all ones value before building a vector. */
1414 if (VECTOR_BOOLEAN_TYPE_P (type
))
1416 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1417 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1419 if (CONSTANT_CLASS_P (val
))
1420 val
= integer_zerop (val
) ? false_val
: true_val
;
1423 new_temp
= make_ssa_name (TREE_TYPE (type
));
1424 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1425 val
, true_val
, false_val
);
1426 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1430 else if (CONSTANT_CLASS_P (val
))
1431 val
= fold_convert (TREE_TYPE (type
), val
);
1434 new_temp
= make_ssa_name (TREE_TYPE (type
));
1435 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1436 init_stmt
= gimple_build_assign (new_temp
,
1437 fold_build1 (VIEW_CONVERT_EXPR
,
1441 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1442 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1446 val
= build_vector_from_val (type
, val
);
1449 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1450 init_stmt
= gimple_build_assign (new_temp
, val
);
1451 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1455 /* Function vect_get_vec_def_for_operand_1.
1457 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1458 with type DT that will be used in the vectorized stmt. */
1461 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1462 enum vect_def_type dt
)
1465 stmt_vec_info vec_stmt_info
;
1469 /* operand is a constant or a loop invariant. */
1470 case vect_constant_def
:
1471 case vect_external_def
:
1472 /* Code should use vect_get_vec_def_for_operand. */
1475 /* operand is defined inside the loop. */
1476 case vect_internal_def
:
1478 /* Get the def from the vectorized stmt. */
1479 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1480 /* Get vectorized pattern statement. */
1482 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1483 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1484 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1485 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1486 gcc_assert (vec_stmt_info
);
1487 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1488 vec_oprnd
= PHI_RESULT (phi
);
1490 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1494 /* operand is defined by a loop header phi. */
1495 case vect_reduction_def
:
1496 case vect_double_reduction_def
:
1497 case vect_nested_cycle
:
1498 case vect_induction_def
:
1500 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
);
1502 /* Get the def from the vectorized stmt. */
1503 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1504 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1505 vec_oprnd
= PHI_RESULT (phi
);
1507 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1517 /* Function vect_get_vec_def_for_operand.
1519 OP is an operand in STMT_VINFO. This function returns a (vector) def
1520 that will be used in the vectorized stmt for STMT_VINFO.
1522 In the case that OP is an SSA_NAME which is defined in the loop, then
1523 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1525 In case OP is an invariant or constant, a new stmt that creates a vector def
1526 needs to be introduced. VECTYPE may be used to specify a required type for
1527 vector invariant. */
1530 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1533 enum vect_def_type dt
;
1535 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1537 if (dump_enabled_p ())
1539 dump_printf_loc (MSG_NOTE
, vect_location
,
1540 "vect_get_vec_def_for_operand: ");
1541 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1542 dump_printf (MSG_NOTE
, "\n");
1545 stmt_vec_info def_stmt_info
;
1546 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1547 &def_stmt_info
, &def_stmt
);
1548 gcc_assert (is_simple_use
);
1549 if (def_stmt
&& dump_enabled_p ())
1551 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1552 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1555 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1557 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1561 vector_type
= vectype
;
1562 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1563 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1564 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1566 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1568 gcc_assert (vector_type
);
1569 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1572 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1576 /* Function vect_get_vec_def_for_stmt_copy
1578 Return a vector-def for an operand. This function is used when the
1579 vectorized stmt to be created (by the caller to this function) is a "copy"
1580 created in case the vectorized result cannot fit in one vector, and several
1581 copies of the vector-stmt are required. In this case the vector-def is
1582 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1583 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1586 In case the vectorization factor (VF) is bigger than the number
1587 of elements that can fit in a vectype (nunits), we have to generate
1588 more than one vector stmt to vectorize the scalar stmt. This situation
1589 arises when there are multiple data-types operated upon in the loop; the
1590 smallest data-type determines the VF, and as a result, when vectorizing
1591 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1592 vector stmt (each computing a vector of 'nunits' results, and together
1593 computing 'VF' results in each iteration). This function is called when
1594 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1595 which VF=16 and nunits=4, so the number of copies required is 4):
1597 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1599 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1600 VS1.1: vx.1 = memref1 VS1.2
1601 VS1.2: vx.2 = memref2 VS1.3
1602 VS1.3: vx.3 = memref3
1604 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1605 VSnew.1: vz1 = vx.1 + ... VSnew.2
1606 VSnew.2: vz2 = vx.2 + ... VSnew.3
1607 VSnew.3: vz3 = vx.3 + ...
1609 The vectorization of S1 is explained in vectorizable_load.
1610 The vectorization of S2:
1611 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1612 the function 'vect_get_vec_def_for_operand' is called to
1613 get the relevant vector-def for each operand of S2. For operand x it
1614 returns the vector-def 'vx.0'.
1616 To create the remaining copies of the vector-stmt (VSnew.j), this
1617 function is called to get the relevant vector-def for each operand. It is
1618 obtained from the respective VS1.j stmt, which is recorded in the
1619 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1621 For example, to obtain the vector-def 'vx.1' in order to create the
1622 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1623 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1624 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1625 and return its def ('vx.1').
1626 Overall, to create the above sequence this function will be called 3 times:
1627 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1628 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1629 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1632 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1634 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1636 /* Do nothing; can reuse same def. */
1639 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1640 gcc_assert (def_stmt_info
);
1641 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1642 vec_oprnd
= PHI_RESULT (phi
);
1644 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1649 /* Get vectorized definitions for the operands to create a copy of an original
1650 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1653 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1654 vec
<tree
> *vec_oprnds0
,
1655 vec
<tree
> *vec_oprnds1
)
1657 tree vec_oprnd
= vec_oprnds0
->pop ();
1659 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1660 vec_oprnds0
->quick_push (vec_oprnd
);
1662 if (vec_oprnds1
&& vec_oprnds1
->length ())
1664 vec_oprnd
= vec_oprnds1
->pop ();
1665 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1666 vec_oprnds1
->quick_push (vec_oprnd
);
1671 /* Get vectorized definitions for OP0 and OP1. */
1674 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1675 vec
<tree
> *vec_oprnds0
,
1676 vec
<tree
> *vec_oprnds1
,
1681 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1682 auto_vec
<tree
> ops (nops
);
1683 auto_vec
<vec
<tree
> > vec_defs (nops
);
1685 ops
.quick_push (op0
);
1687 ops
.quick_push (op1
);
1689 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1691 *vec_oprnds0
= vec_defs
[0];
1693 *vec_oprnds1
= vec_defs
[1];
1699 vec_oprnds0
->create (1);
1700 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1701 vec_oprnds0
->quick_push (vec_oprnd
);
1705 vec_oprnds1
->create (1);
1706 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1707 vec_oprnds1
->quick_push (vec_oprnd
);
1712 /* Helper function called by vect_finish_replace_stmt and
1713 vect_finish_stmt_generation. Set the location of the new
1714 statement and create and return a stmt_vec_info for it. */
1716 static stmt_vec_info
1717 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1719 vec_info
*vinfo
= stmt_info
->vinfo
;
1721 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1723 if (dump_enabled_p ())
1725 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1726 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1729 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1731 /* While EH edges will generally prevent vectorization, stmt might
1732 e.g. be in a must-not-throw region. Ensure newly created stmts
1733 that could throw are part of the same region. */
1734 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1735 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1736 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1738 return vec_stmt_info
;
1741 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1742 which sets the same scalar result as STMT_INFO did. Create and return a
1743 stmt_vec_info for VEC_STMT. */
1746 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1748 gcc_assert (gimple_get_lhs (stmt_info
->stmt
) == gimple_get_lhs (vec_stmt
));
1750 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt_info
->stmt
);
1751 gsi_replace (&gsi
, vec_stmt
, false);
1753 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1756 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1757 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1760 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1761 gimple_stmt_iterator
*gsi
)
1763 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1765 if (!gsi_end_p (*gsi
)
1766 && gimple_has_mem_ops (vec_stmt
))
1768 gimple
*at_stmt
= gsi_stmt (*gsi
);
1769 tree vuse
= gimple_vuse (at_stmt
);
1770 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1772 tree vdef
= gimple_vdef (at_stmt
);
1773 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1774 /* If we have an SSA vuse and insert a store, update virtual
1775 SSA form to avoid triggering the renamer. Do so only
1776 if we can easily see all uses - which is what almost always
1777 happens with the way vectorized stmts are inserted. */
1778 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1779 && ((is_gimple_assign (vec_stmt
)
1780 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1781 || (is_gimple_call (vec_stmt
)
1782 && !(gimple_call_flags (vec_stmt
)
1783 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1785 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1786 gimple_set_vdef (vec_stmt
, new_vdef
);
1787 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1791 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1792 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1795 /* We want to vectorize a call to combined function CFN with function
1796 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1797 as the types of all inputs. Check whether this is possible using
1798 an internal function, returning its code if so or IFN_LAST if not. */
1801 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1802 tree vectype_out
, tree vectype_in
)
1805 if (internal_fn_p (cfn
))
1806 ifn
= as_internal_fn (cfn
);
1808 ifn
= associated_internal_fn (fndecl
);
1809 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1811 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1812 if (info
.vectorizable
)
1814 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1815 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1816 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1817 OPTIMIZE_FOR_SPEED
))
1825 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1826 gimple_stmt_iterator
*);
1828 /* Check whether a load or store statement in the loop described by
1829 LOOP_VINFO is possible in a fully-masked loop. This is testing
1830 whether the vectorizer pass has the appropriate support, as well as
1831 whether the target does.
1833 VLS_TYPE says whether the statement is a load or store and VECTYPE
1834 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1835 says how the load or store is going to be implemented and GROUP_SIZE
1836 is the number of load or store statements in the containing group.
1837 If the access is a gather load or scatter store, GS_INFO describes
1840 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1841 supported, otherwise record the required mask types. */
1844 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1845 vec_load_store_type vls_type
, int group_size
,
1846 vect_memory_access_type memory_access_type
,
1847 gather_scatter_info
*gs_info
)
1849 /* Invariant loads need no special support. */
1850 if (memory_access_type
== VMAT_INVARIANT
)
1853 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1854 machine_mode vecmode
= TYPE_MODE (vectype
);
1855 bool is_load
= (vls_type
== VLS_LOAD
);
1856 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1859 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1860 : !vect_store_lanes_supported (vectype
, group_size
, true))
1862 if (dump_enabled_p ())
1863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1864 "can't use a fully-masked loop because the"
1865 " target doesn't have an appropriate masked"
1866 " load/store-lanes instruction.\n");
1867 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1870 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1871 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1875 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1877 internal_fn ifn
= (is_load
1878 ? IFN_MASK_GATHER_LOAD
1879 : IFN_MASK_SCATTER_STORE
);
1880 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1881 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1882 gs_info
->memory_type
,
1883 TYPE_SIGN (offset_type
),
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1888 "can't use a fully-masked loop because the"
1889 " target doesn't have an appropriate masked"
1890 " gather load or scatter store instruction.\n");
1891 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1894 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1895 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1899 if (memory_access_type
!= VMAT_CONTIGUOUS
1900 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1902 /* Element X of the data must come from iteration i * VF + X of the
1903 scalar loop. We need more work to support other mappings. */
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1906 "can't use a fully-masked loop because an access"
1907 " isn't contiguous.\n");
1908 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1912 machine_mode mask_mode
;
1913 if (!(targetm
.vectorize
.get_mask_mode
1914 (GET_MODE_NUNITS (vecmode
),
1915 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1916 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1918 if (dump_enabled_p ())
1919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1920 "can't use a fully-masked loop because the target"
1921 " doesn't have the appropriate masked load or"
1923 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1926 /* We might load more scalars than we need for permuting SLP loads.
1927 We checked in get_group_load_store_type that the extra elements
1928 don't leak into a new vector. */
1929 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1930 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1931 unsigned int nvectors
;
1932 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1933 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1938 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1939 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1940 that needs to be applied to all loads and stores in a vectorized loop.
1941 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1943 MASK_TYPE is the type of both masks. If new statements are needed,
1944 insert them before GSI. */
1947 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1948 gimple_stmt_iterator
*gsi
)
1950 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1954 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1955 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1956 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1957 vec_mask
, loop_mask
);
1958 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1962 /* Determine whether we can use a gather load or scatter store to vectorize
1963 strided load or store STMT_INFO by truncating the current offset to a
1964 smaller width. We need to be able to construct an offset vector:
1966 { 0, X, X*2, X*3, ... }
1968 without loss of precision, where X is STMT_INFO's DR_STEP.
1970 Return true if this is possible, describing the gather load or scatter
1971 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1974 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1975 loop_vec_info loop_vinfo
, bool masked_p
,
1976 gather_scatter_info
*gs_info
)
1978 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1979 tree step
= DR_STEP (dr
);
1980 if (TREE_CODE (step
) != INTEGER_CST
)
1982 /* ??? Perhaps we could use range information here? */
1983 if (dump_enabled_p ())
1984 dump_printf_loc (MSG_NOTE
, vect_location
,
1985 "cannot truncate variable step.\n");
1989 /* Get the number of bits in an element. */
1990 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1991 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1992 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1994 /* Set COUNT to the upper limit on the number of elements - 1.
1995 Start with the maximum vectorization factor. */
1996 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1998 /* Try lowering COUNT to the number of scalar latch iterations. */
1999 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2000 widest_int max_iters
;
2001 if (max_loop_iterations (loop
, &max_iters
)
2002 && max_iters
< count
)
2003 count
= max_iters
.to_shwi ();
2005 /* Try scales of 1 and the element size. */
2006 int scales
[] = { 1, vect_get_scalar_dr_size (dr
) };
2007 wi::overflow_type overflow
= wi::OVF_NONE
;
2008 for (int i
= 0; i
< 2; ++i
)
2010 int scale
= scales
[i
];
2012 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2015 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2016 in OFFSET_BITS bits. */
2017 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2020 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2021 if (wi::min_precision (range
, sign
) > element_bits
)
2023 overflow
= wi::OVF_UNKNOWN
;
2027 /* See whether the target supports the operation. */
2028 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2029 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2030 memory_type
, element_bits
, sign
, scale
,
2031 &gs_info
->ifn
, &gs_info
->element_type
))
2034 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2037 gs_info
->decl
= NULL_TREE
;
2038 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2039 but we don't need to store that here. */
2040 gs_info
->base
= NULL_TREE
;
2041 gs_info
->offset
= fold_convert (offset_type
, step
);
2042 gs_info
->offset_dt
= vect_constant_def
;
2043 gs_info
->offset_vectype
= NULL_TREE
;
2044 gs_info
->scale
= scale
;
2045 gs_info
->memory_type
= memory_type
;
2049 if (overflow
&& dump_enabled_p ())
2050 dump_printf_loc (MSG_NOTE
, vect_location
,
2051 "truncating gather/scatter offset to %d bits"
2052 " might change its value.\n", element_bits
);
2057 /* Return true if we can use gather/scatter internal functions to
2058 vectorize STMT_INFO, which is a grouped or strided load or store.
2059 MASKED_P is true if load or store is conditional. When returning
2060 true, fill in GS_INFO with the information required to perform the
2064 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2065 loop_vec_info loop_vinfo
, bool masked_p
,
2066 gather_scatter_info
*gs_info
)
2068 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2070 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2073 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2074 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2075 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2076 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2078 /* Enforced by vect_check_gather_scatter. */
2079 gcc_assert (element_bits
>= offset_bits
);
2081 /* If the elements are wider than the offset, convert the offset to the
2082 same width, without changing its sign. */
2083 if (element_bits
> offset_bits
)
2085 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2086 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2087 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2090 if (dump_enabled_p ())
2091 dump_printf_loc (MSG_NOTE
, vect_location
,
2092 "using gather/scatter for strided/grouped access,"
2093 " scale = %d\n", gs_info
->scale
);
2098 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2099 elements with a known constant step. Return -1 if that step
2100 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2103 compare_step_with_zero (stmt_vec_info stmt_info
)
2105 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2106 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
2110 /* If the target supports a permute mask that reverses the elements in
2111 a vector of type VECTYPE, return that mask, otherwise return null. */
2114 perm_mask_for_reverse (tree vectype
)
2116 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2118 /* The encoding has a single stepped pattern. */
2119 vec_perm_builder
sel (nunits
, 1, 3);
2120 for (int i
= 0; i
< 3; ++i
)
2121 sel
.quick_push (nunits
- 1 - i
);
2123 vec_perm_indices
indices (sel
, 1, nunits
);
2124 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2126 return vect_gen_perm_mask_checked (vectype
, indices
);
2129 /* STMT_INFO is either a masked or unconditional store. Return the value
2133 vect_get_store_rhs (stmt_vec_info stmt_info
)
2135 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2137 gcc_assert (gimple_assign_single_p (assign
));
2138 return gimple_assign_rhs1 (assign
);
2140 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2142 internal_fn ifn
= gimple_call_internal_fn (call
);
2143 int index
= internal_fn_stored_value_index (ifn
);
2144 gcc_assert (index
>= 0);
2145 return gimple_call_arg (call
, index
);
2150 /* A subroutine of get_load_store_type, with a subset of the same
2151 arguments. Handle the case where STMT_INFO is part of a grouped load
2154 For stores, the statements in the group are all consecutive
2155 and there is no gap at the end. For loads, the statements in the
2156 group might not be consecutive; there can be gaps between statements
2157 as well as at the end. */
2160 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2161 bool masked_p
, vec_load_store_type vls_type
,
2162 vect_memory_access_type
*memory_access_type
,
2163 gather_scatter_info
*gs_info
)
2165 vec_info
*vinfo
= stmt_info
->vinfo
;
2166 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2167 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2168 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2169 data_reference
*first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
2170 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2171 bool single_element_p
= (stmt_info
== first_stmt_info
2172 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2173 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2174 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2176 /* True if the vectorized statements would access beyond the last
2177 statement in the group. */
2178 bool overrun_p
= false;
2180 /* True if we can cope with such overrun by peeling for gaps, so that
2181 there is at least one final scalar iteration after the vector loop. */
2182 bool can_overrun_p
= (!masked_p
2183 && vls_type
== VLS_LOAD
2187 /* There can only be a gap at the end of the group if the stride is
2188 known at compile time. */
2189 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
2191 /* Stores can't yet have gaps. */
2192 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2196 if (STMT_VINFO_STRIDED_P (stmt_info
))
2198 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2199 separated by the stride, until we have a complete vector.
2200 Fall back to scalar accesses if that isn't possible. */
2201 if (multiple_p (nunits
, group_size
))
2202 *memory_access_type
= VMAT_STRIDED_SLP
;
2204 *memory_access_type
= VMAT_ELEMENTWISE
;
2208 overrun_p
= loop_vinfo
&& gap
!= 0;
2209 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2211 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2212 "Grouped store with gaps requires"
2213 " non-consecutive accesses\n");
2216 /* An overrun is fine if the trailing elements are smaller
2217 than the alignment boundary B. Every vector access will
2218 be a multiple of B and so we are guaranteed to access a
2219 non-gap element in the same B-sized block. */
2221 && gap
< (vect_known_alignment_in_bytes (first_dr
)
2222 / vect_get_scalar_dr_size (first_dr
)))
2224 if (overrun_p
&& !can_overrun_p
)
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2228 "Peeling for outer loop is not supported\n");
2231 *memory_access_type
= VMAT_CONTIGUOUS
;
2236 /* We can always handle this case using elementwise accesses,
2237 but see if something more efficient is available. */
2238 *memory_access_type
= VMAT_ELEMENTWISE
;
2240 /* If there is a gap at the end of the group then these optimizations
2241 would access excess elements in the last iteration. */
2242 bool would_overrun_p
= (gap
!= 0);
2243 /* An overrun is fine if the trailing elements are smaller than the
2244 alignment boundary B. Every vector access will be a multiple of B
2245 and so we are guaranteed to access a non-gap element in the
2246 same B-sized block. */
2249 && gap
< (vect_known_alignment_in_bytes (first_dr
)
2250 / vect_get_scalar_dr_size (first_dr
)))
2251 would_overrun_p
= false;
2253 if (!STMT_VINFO_STRIDED_P (stmt_info
)
2254 && (can_overrun_p
|| !would_overrun_p
)
2255 && compare_step_with_zero (stmt_info
) > 0)
2257 /* First cope with the degenerate case of a single-element
2259 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2260 *memory_access_type
= VMAT_CONTIGUOUS
;
2262 /* Otherwise try using LOAD/STORE_LANES. */
2263 if (*memory_access_type
== VMAT_ELEMENTWISE
2264 && (vls_type
== VLS_LOAD
2265 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2266 : vect_store_lanes_supported (vectype
, group_size
,
2269 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2270 overrun_p
= would_overrun_p
;
2273 /* If that fails, try using permuting loads. */
2274 if (*memory_access_type
== VMAT_ELEMENTWISE
2275 && (vls_type
== VLS_LOAD
2276 ? vect_grouped_load_supported (vectype
, single_element_p
,
2278 : vect_grouped_store_supported (vectype
, group_size
)))
2280 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2281 overrun_p
= would_overrun_p
;
2285 /* As a last resort, trying using a gather load or scatter store.
2287 ??? Although the code can handle all group sizes correctly,
2288 it probably isn't a win to use separate strided accesses based
2289 on nearby locations. Or, even if it's a win over scalar code,
2290 it might not be a win over vectorizing at a lower VF, if that
2291 allows us to use contiguous accesses. */
2292 if (*memory_access_type
== VMAT_ELEMENTWISE
2295 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2297 *memory_access_type
= VMAT_GATHER_SCATTER
;
2300 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2302 /* STMT is the leader of the group. Check the operands of all the
2303 stmts of the group. */
2304 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2305 while (next_stmt_info
)
2307 tree op
= vect_get_store_rhs (next_stmt_info
);
2308 enum vect_def_type dt
;
2309 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2311 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2313 "use not simple.\n");
2316 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2322 gcc_assert (can_overrun_p
);
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2325 "Data access with gaps requires scalar "
2327 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2333 /* A subroutine of get_load_store_type, with a subset of the same
2334 arguments. Handle the case where STMT_INFO is a load or store that
2335 accesses consecutive elements with a negative step. */
2337 static vect_memory_access_type
2338 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2339 vec_load_store_type vls_type
,
2340 unsigned int ncopies
)
2342 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2343 dr_alignment_support alignment_support_scheme
;
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2349 "multiple types with negative step.\n");
2350 return VMAT_ELEMENTWISE
;
2353 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
2354 if (alignment_support_scheme
!= dr_aligned
2355 && alignment_support_scheme
!= dr_unaligned_supported
)
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2359 "negative step but alignment required.\n");
2360 return VMAT_ELEMENTWISE
;
2363 if (vls_type
== VLS_STORE_INVARIANT
)
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE
, vect_location
,
2367 "negative step with invariant source;"
2368 " no permute needed.\n");
2369 return VMAT_CONTIGUOUS_DOWN
;
2372 if (!perm_mask_for_reverse (vectype
))
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2376 "negative step and reversing not supported.\n");
2377 return VMAT_ELEMENTWISE
;
2380 return VMAT_CONTIGUOUS_REVERSE
;
2383 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2384 if there is a memory access type that the vectorized form can use,
2385 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2386 or scatters, fill in GS_INFO accordingly.
2388 SLP says whether we're performing SLP rather than loop vectorization.
2389 MASKED_P is true if the statement is conditional on a vectorized mask.
2390 VECTYPE is the vector type that the vectorized statements will use.
2391 NCOPIES is the number of vector statements that will be needed. */
2394 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2395 bool masked_p
, vec_load_store_type vls_type
,
2396 unsigned int ncopies
,
2397 vect_memory_access_type
*memory_access_type
,
2398 gather_scatter_info
*gs_info
)
2400 vec_info
*vinfo
= stmt_info
->vinfo
;
2401 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2402 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2403 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2405 *memory_access_type
= VMAT_GATHER_SCATTER
;
2406 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2408 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2409 &gs_info
->offset_dt
,
2410 &gs_info
->offset_vectype
))
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2414 "%s index use not simple.\n",
2415 vls_type
== VLS_LOAD
? "gather" : "scatter");
2419 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2421 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2422 vls_type
, memory_access_type
, gs_info
))
2425 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2429 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2431 *memory_access_type
= VMAT_GATHER_SCATTER
;
2433 *memory_access_type
= VMAT_ELEMENTWISE
;
2437 int cmp
= compare_step_with_zero (stmt_info
);
2439 *memory_access_type
= get_negative_load_store_type
2440 (stmt_info
, vectype
, vls_type
, ncopies
);
2443 gcc_assert (vls_type
== VLS_LOAD
);
2444 *memory_access_type
= VMAT_INVARIANT
;
2447 *memory_access_type
= VMAT_CONTIGUOUS
;
2450 if ((*memory_access_type
== VMAT_ELEMENTWISE
2451 || *memory_access_type
== VMAT_STRIDED_SLP
)
2452 && !nunits
.is_constant ())
2454 if (dump_enabled_p ())
2455 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2456 "Not using elementwise accesses due to variable "
2457 "vectorization factor.\n");
2461 /* FIXME: At the moment the cost model seems to underestimate the
2462 cost of using elementwise accesses. This check preserves the
2463 traditional behavior until that can be fixed. */
2464 if (*memory_access_type
== VMAT_ELEMENTWISE
2465 && !STMT_VINFO_STRIDED_P (stmt_info
)
2466 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2467 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2468 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2472 "not falling back to elementwise accesses\n");
2478 /* Return true if boolean argument MASK is suitable for vectorizing
2479 conditional load or store STMT_INFO. When returning true, store the type
2480 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2481 in *MASK_VECTYPE_OUT. */
2484 vect_check_load_store_mask (stmt_vec_info stmt_info
, tree mask
,
2485 vect_def_type
*mask_dt_out
,
2486 tree
*mask_vectype_out
)
2488 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2492 "mask argument is not a boolean.\n");
2496 if (TREE_CODE (mask
) != SSA_NAME
)
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2500 "mask argument is not an SSA name.\n");
2504 enum vect_def_type mask_dt
;
2506 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2510 "mask use not simple.\n");
2514 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2516 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2518 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2520 if (dump_enabled_p ())
2521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2522 "could not find an appropriate vector mask type.\n");
2526 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2527 TYPE_VECTOR_SUBPARTS (vectype
)))
2529 if (dump_enabled_p ())
2531 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2532 "vector mask type ");
2533 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, mask_vectype
);
2534 dump_printf (MSG_MISSED_OPTIMIZATION
,
2535 " does not match vector data type ");
2536 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
2537 dump_printf (MSG_MISSED_OPTIMIZATION
, ".\n");
2542 *mask_dt_out
= mask_dt
;
2543 *mask_vectype_out
= mask_vectype
;
2547 /* Return true if stored value RHS is suitable for vectorizing store
2548 statement STMT_INFO. When returning true, store the type of the
2549 definition in *RHS_DT_OUT, the type of the vectorized store value in
2550 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2553 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2554 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2555 vec_load_store_type
*vls_type_out
)
2557 /* In the case this is a store from a constant make sure
2558 native_encode_expr can handle it. */
2559 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2561 if (dump_enabled_p ())
2562 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2563 "cannot encode constant as a byte sequence.\n");
2567 enum vect_def_type rhs_dt
;
2569 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2571 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2573 "use not simple.\n");
2577 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2578 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2582 "incompatible vector types.\n");
2586 *rhs_dt_out
= rhs_dt
;
2587 *rhs_vectype_out
= rhs_vectype
;
2588 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2589 *vls_type_out
= VLS_STORE_INVARIANT
;
2591 *vls_type_out
= VLS_STORE
;
2595 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2596 Note that we support masks with floating-point type, in which case the
2597 floats are interpreted as a bitmask. */
2600 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2602 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2603 return build_int_cst (masktype
, -1);
2604 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2606 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2607 mask
= build_vector_from_val (masktype
, mask
);
2608 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2610 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2614 for (int j
= 0; j
< 6; ++j
)
2616 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2617 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2618 mask
= build_vector_from_val (masktype
, mask
);
2619 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2624 /* Build an all-zero merge value of type VECTYPE while vectorizing
2625 STMT_INFO as a gather load. */
2628 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2631 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2632 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2633 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2637 for (int j
= 0; j
< 6; ++j
)
2639 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2640 merge
= build_real (TREE_TYPE (vectype
), r
);
2644 merge
= build_vector_from_val (vectype
, merge
);
2645 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2648 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2649 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2650 the gather load operation. If the load is conditional, MASK is the
2651 unvectorized condition and MASK_DT is its definition type, otherwise
2655 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2656 gimple_stmt_iterator
*gsi
,
2657 stmt_vec_info
*vec_stmt
,
2658 gather_scatter_info
*gs_info
,
2661 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2662 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2663 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2664 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2665 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2666 edge pe
= loop_preheader_edge (loop
);
2667 enum { NARROW
, NONE
, WIDEN
} modifier
;
2668 poly_uint64 gather_off_nunits
2669 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2671 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2672 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2673 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2674 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2675 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2676 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2677 tree scaletype
= TREE_VALUE (arglist
);
2678 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2679 && (!mask
|| types_compatible_p (srctype
, masktype
)));
2681 tree perm_mask
= NULL_TREE
;
2682 tree mask_perm_mask
= NULL_TREE
;
2683 if (known_eq (nunits
, gather_off_nunits
))
2685 else if (known_eq (nunits
* 2, gather_off_nunits
))
2689 /* Currently widening gathers and scatters are only supported for
2690 fixed-length vectors. */
2691 int count
= gather_off_nunits
.to_constant ();
2692 vec_perm_builder
sel (count
, count
, 1);
2693 for (int i
= 0; i
< count
; ++i
)
2694 sel
.quick_push (i
| (count
/ 2));
2696 vec_perm_indices
indices (sel
, 1, count
);
2697 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2700 else if (known_eq (nunits
, gather_off_nunits
* 2))
2704 /* Currently narrowing gathers and scatters are only supported for
2705 fixed-length vectors. */
2706 int count
= nunits
.to_constant ();
2707 vec_perm_builder
sel (count
, count
, 1);
2708 sel
.quick_grow (count
);
2709 for (int i
= 0; i
< count
; ++i
)
2710 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2711 vec_perm_indices
indices (sel
, 2, count
);
2712 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2718 for (int i
= 0; i
< count
; ++i
)
2719 sel
[i
] = i
| (count
/ 2);
2720 indices
.new_vector (sel
, 2, count
);
2721 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2727 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2728 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2730 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2731 if (!is_gimple_min_invariant (ptr
))
2734 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2735 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2736 gcc_assert (!new_bb
);
2739 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2741 tree vec_oprnd0
= NULL_TREE
;
2742 tree vec_mask
= NULL_TREE
;
2743 tree src_op
= NULL_TREE
;
2744 tree mask_op
= NULL_TREE
;
2745 tree prev_res
= NULL_TREE
;
2746 stmt_vec_info prev_stmt_info
= NULL
;
2750 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2751 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2754 for (int j
= 0; j
< ncopies
; ++j
)
2757 if (modifier
== WIDEN
&& (j
& 1))
2758 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2759 perm_mask
, stmt_info
, gsi
);
2762 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2764 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2767 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2769 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2770 TYPE_VECTOR_SUBPARTS (idxtype
)));
2771 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2772 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2773 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2774 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2780 if (mask_perm_mask
&& (j
& 1))
2781 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2782 mask_perm_mask
, stmt_info
, gsi
);
2786 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2788 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2792 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2795 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
)),
2796 TYPE_VECTOR_SUBPARTS (masktype
)));
2797 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2798 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2800 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2801 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2808 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2811 stmt_vec_info new_stmt_info
;
2812 if (!useless_type_conversion_p (vectype
, rettype
))
2814 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2815 TYPE_VECTOR_SUBPARTS (rettype
)));
2816 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2817 gimple_call_set_lhs (new_call
, op
);
2818 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2819 var
= make_ssa_name (vec_dest
);
2820 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2821 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2823 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2827 var
= make_ssa_name (vec_dest
, new_call
);
2828 gimple_call_set_lhs (new_call
, var
);
2830 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2833 if (modifier
== NARROW
)
2840 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2842 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2845 if (prev_stmt_info
== NULL_STMT_VEC_INFO
)
2846 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2848 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2849 prev_stmt_info
= new_stmt_info
;
2853 /* Prepare the base and offset in GS_INFO for vectorization.
2854 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2855 to the vectorized offset argument for the first copy of STMT_INFO.
2856 STMT_INFO is the statement described by GS_INFO and LOOP is the
2860 vect_get_gather_scatter_ops (struct loop
*loop
, stmt_vec_info stmt_info
,
2861 gather_scatter_info
*gs_info
,
2862 tree
*dataref_ptr
, tree
*vec_offset
)
2864 gimple_seq stmts
= NULL
;
2865 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2869 edge pe
= loop_preheader_edge (loop
);
2870 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2871 gcc_assert (!new_bb
);
2873 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2874 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2875 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2879 /* Prepare to implement a grouped or strided load or store using
2880 the gather load or scatter store operation described by GS_INFO.
2881 STMT_INFO is the load or store statement.
2883 Set *DATAREF_BUMP to the amount that should be added to the base
2884 address after each copy of the vectorized statement. Set *VEC_OFFSET
2885 to an invariant offset vector in which element I has the value
2886 I * DR_STEP / SCALE. */
2889 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2890 loop_vec_info loop_vinfo
,
2891 gather_scatter_info
*gs_info
,
2892 tree
*dataref_bump
, tree
*vec_offset
)
2894 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2895 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2896 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2899 tree bump
= size_binop (MULT_EXPR
,
2900 fold_convert (sizetype
, DR_STEP (dr
)),
2901 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2902 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2904 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2906 /* The offset given in GS_INFO can have pointer type, so use the element
2907 type of the vector instead. */
2908 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2909 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2910 offset_type
= TREE_TYPE (offset_vectype
);
2912 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2913 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
2914 ssize_int (gs_info
->scale
));
2915 step
= fold_convert (offset_type
, step
);
2916 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2918 /* Create {0, X, X*2, X*3, ...}. */
2919 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
2920 build_zero_cst (offset_type
), step
);
2922 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2925 /* Return the amount that should be added to a vector pointer to move
2926 to the next or previous copy of AGGR_TYPE. DR is the data reference
2927 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2931 vect_get_data_ptr_increment (data_reference
*dr
, tree aggr_type
,
2932 vect_memory_access_type memory_access_type
)
2934 if (memory_access_type
== VMAT_INVARIANT
)
2935 return size_zero_node
;
2937 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2938 tree step
= vect_dr_behavior (dr
)->step
;
2939 if (tree_int_cst_sgn (step
) == -1)
2940 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2944 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2947 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2948 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
2949 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2952 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2953 vec_info
*vinfo
= stmt_info
->vinfo
;
2954 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2956 unsigned HOST_WIDE_INT nunits
, num_bytes
;
2958 op
= gimple_call_arg (stmt
, 0);
2959 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2961 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
2964 /* Multiple types in SLP are handled by creating the appropriate number of
2965 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2970 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2972 gcc_assert (ncopies
>= 1);
2974 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2978 if (!TYPE_VECTOR_SUBPARTS (char_vectype
).is_constant (&num_bytes
))
2981 unsigned word_bytes
= num_bytes
/ nunits
;
2983 /* The encoding uses one stepped pattern for each byte in the word. */
2984 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2985 for (unsigned i
= 0; i
< 3; ++i
)
2986 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2987 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2989 vec_perm_indices
indices (elts
, 1, num_bytes
);
2990 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2995 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2996 DUMP_VECT_SCOPE ("vectorizable_bswap");
2999 record_stmt_cost (cost_vec
,
3000 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3001 record_stmt_cost (cost_vec
,
3002 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3007 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3010 vec
<tree
> vec_oprnds
= vNULL
;
3011 stmt_vec_info new_stmt_info
= NULL
;
3012 stmt_vec_info prev_stmt_info
= NULL
;
3013 for (unsigned j
= 0; j
< ncopies
; j
++)
3017 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
3019 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3021 /* Arguments are ready. create the new vector stmt. */
3024 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3027 tree tem
= make_ssa_name (char_vectype
);
3028 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3029 char_vectype
, vop
));
3030 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3031 tree tem2
= make_ssa_name (char_vectype
);
3032 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3033 tem
, tem
, bswap_vconst
);
3034 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3035 tem
= make_ssa_name (vectype
);
3036 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3039 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3041 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3048 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3050 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3052 prev_stmt_info
= new_stmt_info
;
3055 vec_oprnds
.release ();
3059 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3060 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3061 in a single step. On success, store the binary pack code in
3065 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3066 tree_code
*convert_code
)
3068 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3069 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3073 int multi_step_cvt
= 0;
3074 auto_vec
<tree
, 8> interm_types
;
3075 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3076 &code
, &multi_step_cvt
,
3081 *convert_code
= code
;
3085 /* Function vectorizable_call.
3087 Check if STMT_INFO performs a function call that can be vectorized.
3088 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3089 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3090 Return true if STMT_INFO is vectorizable in this way. */
3093 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3094 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3095 stmt_vector_for_cost
*cost_vec
)
3101 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3102 stmt_vec_info prev_stmt_info
;
3103 tree vectype_out
, vectype_in
;
3104 poly_uint64 nunits_in
;
3105 poly_uint64 nunits_out
;
3106 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3107 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3108 vec_info
*vinfo
= stmt_info
->vinfo
;
3109 tree fndecl
, new_temp
, rhs_type
;
3110 enum vect_def_type dt
[4]
3111 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3112 vect_unknown_def_type
};
3113 int ndts
= ARRAY_SIZE (dt
);
3115 auto_vec
<tree
, 8> vargs
;
3116 auto_vec
<tree
, 8> orig_vargs
;
3117 enum { NARROW
, NONE
, WIDEN
} modifier
;
3121 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3124 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3128 /* Is STMT_INFO a vectorizable call? */
3129 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3133 if (gimple_call_internal_p (stmt
)
3134 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3135 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3136 /* Handled by vectorizable_load and vectorizable_store. */
3139 if (gimple_call_lhs (stmt
) == NULL_TREE
3140 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3143 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3145 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3147 /* Process function arguments. */
3148 rhs_type
= NULL_TREE
;
3149 vectype_in
= NULL_TREE
;
3150 nargs
= gimple_call_num_args (stmt
);
3152 /* Bail out if the function has more than three arguments, we do not have
3153 interesting builtin functions to vectorize with more than two arguments
3154 except for fma. No arguments is also not good. */
3155 if (nargs
== 0 || nargs
> 4)
3158 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3159 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3160 if (cfn
== CFN_GOMP_SIMD_LANE
)
3163 rhs_type
= unsigned_type_node
;
3167 if (internal_fn_p (cfn
))
3168 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3170 for (i
= 0; i
< nargs
; i
++)
3174 op
= gimple_call_arg (stmt
, i
);
3175 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &opvectype
))
3177 if (dump_enabled_p ())
3178 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3179 "use not simple.\n");
3183 /* Skip the mask argument to an internal function. This operand
3184 has been converted via a pattern if necessary. */
3185 if ((int) i
== mask_opno
)
3188 /* We can only handle calls with arguments of the same type. */
3190 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3192 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3194 "argument types differ.\n");
3198 rhs_type
= TREE_TYPE (op
);
3201 vectype_in
= opvectype
;
3203 && opvectype
!= vectype_in
)
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3207 "argument vector types differ.\n");
3211 /* If all arguments are external or constant defs use a vector type with
3212 the same size as the output vector type. */
3214 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3216 gcc_assert (vectype_in
);
3219 if (dump_enabled_p ())
3221 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3222 "no vectype for scalar type ");
3223 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3224 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3231 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3232 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3233 if (known_eq (nunits_in
* 2, nunits_out
))
3235 else if (known_eq (nunits_out
, nunits_in
))
3237 else if (known_eq (nunits_out
* 2, nunits_in
))
3242 /* We only handle functions that do not read or clobber memory. */
3243 if (gimple_vuse (stmt
))
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3247 "function reads from or writes to memory.\n");
3251 /* For now, we only vectorize functions if a target specific builtin
3252 is available. TODO -- in some cases, it might be profitable to
3253 insert the calls for pieces of the vector, in order to be able
3254 to vectorize other operations in the loop. */
3256 internal_fn ifn
= IFN_LAST
;
3257 tree callee
= gimple_call_fndecl (stmt
);
3259 /* First try using an internal function. */
3260 tree_code convert_code
= ERROR_MARK
;
3262 && (modifier
== NONE
3263 || (modifier
== NARROW
3264 && simple_integer_narrowing (vectype_out
, vectype_in
,
3266 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3269 /* If that fails, try asking for a target-specific built-in function. */
3270 if (ifn
== IFN_LAST
)
3272 if (cfn
!= CFN_LAST
)
3273 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3274 (cfn
, vectype_out
, vectype_in
);
3276 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3277 (callee
, vectype_out
, vectype_in
);
3280 if (ifn
== IFN_LAST
&& !fndecl
)
3282 if (cfn
== CFN_GOMP_SIMD_LANE
3285 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3286 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3287 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3288 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3290 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3291 { 0, 1, 2, ... vf - 1 } vector. */
3292 gcc_assert (nargs
== 0);
3294 else if (modifier
== NONE
3295 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3296 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3297 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3298 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3299 vectype_in
, cost_vec
);
3302 if (dump_enabled_p ())
3303 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3304 "function is not vectorizable.\n");
3311 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3312 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3314 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3316 /* Sanity check: make sure that at least one copy of the vectorized stmt
3317 needs to be generated. */
3318 gcc_assert (ncopies
>= 1);
3320 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3321 if (!vec_stmt
) /* transformation not required. */
3323 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3324 DUMP_VECT_SCOPE ("vectorizable_call");
3325 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3326 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3327 record_stmt_cost (cost_vec
, ncopies
/ 2,
3328 vec_promote_demote
, stmt_info
, 0, vect_body
);
3330 if (loop_vinfo
&& mask_opno
>= 0)
3332 unsigned int nvectors
= (slp_node
3333 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3335 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
);
3342 if (dump_enabled_p ())
3343 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3346 scalar_dest
= gimple_call_lhs (stmt
);
3347 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3349 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3351 stmt_vec_info new_stmt_info
= NULL
;
3352 prev_stmt_info
= NULL
;
3353 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3355 tree prev_res
= NULL_TREE
;
3356 vargs
.safe_grow (nargs
);
3357 orig_vargs
.safe_grow (nargs
);
3358 for (j
= 0; j
< ncopies
; ++j
)
3360 /* Build argument list for the vectorized call. */
3363 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3364 vec
<tree
> vec_oprnds0
;
3366 for (i
= 0; i
< nargs
; i
++)
3367 vargs
[i
] = gimple_call_arg (stmt
, i
);
3368 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3369 vec_oprnds0
= vec_defs
[0];
3371 /* Arguments are ready. Create the new vector stmt. */
3372 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3375 for (k
= 0; k
< nargs
; k
++)
3377 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3378 vargs
[k
] = vec_oprndsk
[i
];
3380 if (modifier
== NARROW
)
3382 /* We don't define any narrowing conditional functions
3384 gcc_assert (mask_opno
< 0);
3385 tree half_res
= make_ssa_name (vectype_in
);
3387 = gimple_build_call_internal_vec (ifn
, vargs
);
3388 gimple_call_set_lhs (call
, half_res
);
3389 gimple_call_set_nothrow (call
, true);
3391 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3394 prev_res
= half_res
;
3397 new_temp
= make_ssa_name (vec_dest
);
3399 = gimple_build_assign (new_temp
, convert_code
,
3400 prev_res
, half_res
);
3402 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3407 if (mask_opno
>= 0 && masked_loop_p
)
3409 unsigned int vec_num
= vec_oprnds0
.length ();
3410 /* Always true for SLP. */
3411 gcc_assert (ncopies
== 1);
3412 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3414 vargs
[mask_opno
] = prepare_load_store_mask
3415 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3419 if (ifn
!= IFN_LAST
)
3420 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3422 call
= gimple_build_call_vec (fndecl
, vargs
);
3423 new_temp
= make_ssa_name (vec_dest
, call
);
3424 gimple_call_set_lhs (call
, new_temp
);
3425 gimple_call_set_nothrow (call
, true);
3427 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3429 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3432 for (i
= 0; i
< nargs
; i
++)
3434 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3435 vec_oprndsi
.release ();
3440 for (i
= 0; i
< nargs
; i
++)
3442 op
= gimple_call_arg (stmt
, i
);
3445 = vect_get_vec_def_for_operand (op
, stmt_info
);
3448 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3450 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3453 if (mask_opno
>= 0 && masked_loop_p
)
3455 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3458 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3459 vargs
[mask_opno
], gsi
);
3462 if (cfn
== CFN_GOMP_SIMD_LANE
)
3464 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3466 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3467 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3468 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3469 new_temp
= make_ssa_name (vec_dest
);
3470 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3472 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3474 else if (modifier
== NARROW
)
3476 /* We don't define any narrowing conditional functions at
3478 gcc_assert (mask_opno
< 0);
3479 tree half_res
= make_ssa_name (vectype_in
);
3480 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3481 gimple_call_set_lhs (call
, half_res
);
3482 gimple_call_set_nothrow (call
, true);
3484 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3487 prev_res
= half_res
;
3490 new_temp
= make_ssa_name (vec_dest
);
3491 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3492 prev_res
, half_res
);
3494 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3499 if (ifn
!= IFN_LAST
)
3500 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3502 call
= gimple_build_call_vec (fndecl
, vargs
);
3503 new_temp
= make_ssa_name (vec_dest
, call
);
3504 gimple_call_set_lhs (call
, new_temp
);
3505 gimple_call_set_nothrow (call
, true);
3507 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3510 if (j
== (modifier
== NARROW
? 1 : 0))
3511 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3513 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3515 prev_stmt_info
= new_stmt_info
;
3518 else if (modifier
== NARROW
)
3520 /* We don't define any narrowing conditional functions at present. */
3521 gcc_assert (mask_opno
< 0);
3522 for (j
= 0; j
< ncopies
; ++j
)
3524 /* Build argument list for the vectorized call. */
3526 vargs
.create (nargs
* 2);
3532 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3533 vec
<tree
> vec_oprnds0
;
3535 for (i
= 0; i
< nargs
; i
++)
3536 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3537 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3538 vec_oprnds0
= vec_defs
[0];
3540 /* Arguments are ready. Create the new vector stmt. */
3541 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3545 for (k
= 0; k
< nargs
; k
++)
3547 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3548 vargs
.quick_push (vec_oprndsk
[i
]);
3549 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3552 if (ifn
!= IFN_LAST
)
3553 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3555 call
= gimple_build_call_vec (fndecl
, vargs
);
3556 new_temp
= make_ssa_name (vec_dest
, call
);
3557 gimple_call_set_lhs (call
, new_temp
);
3558 gimple_call_set_nothrow (call
, true);
3560 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3561 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3564 for (i
= 0; i
< nargs
; i
++)
3566 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3567 vec_oprndsi
.release ();
3572 for (i
= 0; i
< nargs
; i
++)
3574 op
= gimple_call_arg (stmt
, i
);
3578 = vect_get_vec_def_for_operand (op
, stmt_info
);
3580 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3584 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3587 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3589 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3592 vargs
.quick_push (vec_oprnd0
);
3593 vargs
.quick_push (vec_oprnd1
);
3596 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3597 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3598 gimple_call_set_lhs (new_stmt
, new_temp
);
3600 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3603 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3605 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3607 prev_stmt_info
= new_stmt_info
;
3610 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3613 /* No current target implements this case. */
3618 /* The call in STMT might prevent it from being removed in dce.
3619 We however cannot remove it here, due to the way the ssa name
3620 it defines is mapped to the new definition. So just replace
3621 rhs of the statement with something harmless. */
3626 if (is_pattern_stmt_p (stmt_info
))
3627 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
3628 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3631 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3632 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3633 set_vinfo_for_stmt (stmt_info
->stmt
, NULL
);
3634 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3635 gsi_replace (gsi
, new_stmt
, false);
3641 struct simd_call_arg_info
3645 HOST_WIDE_INT linear_step
;
3646 enum vect_def_type dt
;
3648 bool simd_lane_linear
;
3651 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3652 is linear within simd lane (but not within whole loop), note it in
3656 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3657 struct simd_call_arg_info
*arginfo
)
3659 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3661 if (!is_gimple_assign (def_stmt
)
3662 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3663 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3666 tree base
= gimple_assign_rhs1 (def_stmt
);
3667 HOST_WIDE_INT linear_step
= 0;
3668 tree v
= gimple_assign_rhs2 (def_stmt
);
3669 while (TREE_CODE (v
) == SSA_NAME
)
3672 def_stmt
= SSA_NAME_DEF_STMT (v
);
3673 if (is_gimple_assign (def_stmt
))
3674 switch (gimple_assign_rhs_code (def_stmt
))
3677 t
= gimple_assign_rhs2 (def_stmt
);
3678 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3680 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3681 v
= gimple_assign_rhs1 (def_stmt
);
3684 t
= gimple_assign_rhs2 (def_stmt
);
3685 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3687 linear_step
= tree_to_shwi (t
);
3688 v
= gimple_assign_rhs1 (def_stmt
);
3691 t
= gimple_assign_rhs1 (def_stmt
);
3692 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3693 || (TYPE_PRECISION (TREE_TYPE (v
))
3694 < TYPE_PRECISION (TREE_TYPE (t
))))
3703 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3705 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3706 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3711 arginfo
->linear_step
= linear_step
;
3713 arginfo
->simd_lane_linear
= true;
3719 /* Return the number of elements in vector type VECTYPE, which is associated
3720 with a SIMD clone. At present these vectors always have a constant
3723 static unsigned HOST_WIDE_INT
3724 simd_clone_subparts (tree vectype
)
3726 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3729 /* Function vectorizable_simd_clone_call.
3731 Check if STMT_INFO performs a function call that can be vectorized
3732 by calling a simd clone of the function.
3733 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3734 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3735 Return true if STMT_INFO is vectorizable in this way. */
3738 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3739 gimple_stmt_iterator
*gsi
,
3740 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3741 stmt_vector_for_cost
*)
3746 tree vec_oprnd0
= NULL_TREE
;
3747 stmt_vec_info prev_stmt_info
;
3749 unsigned int nunits
;
3750 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3751 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3752 vec_info
*vinfo
= stmt_info
->vinfo
;
3753 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3754 tree fndecl
, new_temp
;
3756 auto_vec
<simd_call_arg_info
> arginfo
;
3757 vec
<tree
> vargs
= vNULL
;
3759 tree lhs
, rtype
, ratype
;
3760 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3762 /* Is STMT a vectorizable call? */
3763 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3767 fndecl
= gimple_call_fndecl (stmt
);
3768 if (fndecl
== NULL_TREE
)
3771 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3772 if (node
== NULL
|| node
->simd_clones
== NULL
)
3775 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3778 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3782 if (gimple_call_lhs (stmt
)
3783 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3786 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3788 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3790 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3797 /* Process function arguments. */
3798 nargs
= gimple_call_num_args (stmt
);
3800 /* Bail out if the function has zero arguments. */
3804 arginfo
.reserve (nargs
, true);
3806 for (i
= 0; i
< nargs
; i
++)
3808 simd_call_arg_info thisarginfo
;
3811 thisarginfo
.linear_step
= 0;
3812 thisarginfo
.align
= 0;
3813 thisarginfo
.op
= NULL_TREE
;
3814 thisarginfo
.simd_lane_linear
= false;
3816 op
= gimple_call_arg (stmt
, i
);
3817 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3818 &thisarginfo
.vectype
)
3819 || thisarginfo
.dt
== vect_uninitialized_def
)
3821 if (dump_enabled_p ())
3822 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3823 "use not simple.\n");
3827 if (thisarginfo
.dt
== vect_constant_def
3828 || thisarginfo
.dt
== vect_external_def
)
3829 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3831 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3833 /* For linear arguments, the analyze phase should have saved
3834 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3835 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3836 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3838 gcc_assert (vec_stmt
);
3839 thisarginfo
.linear_step
3840 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3842 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3843 thisarginfo
.simd_lane_linear
3844 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3845 == boolean_true_node
);
3846 /* If loop has been peeled for alignment, we need to adjust it. */
3847 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3848 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3849 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3851 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3852 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3853 tree opt
= TREE_TYPE (thisarginfo
.op
);
3854 bias
= fold_convert (TREE_TYPE (step
), bias
);
3855 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3857 = fold_build2 (POINTER_TYPE_P (opt
)
3858 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3859 thisarginfo
.op
, bias
);
3863 && thisarginfo
.dt
!= vect_constant_def
3864 && thisarginfo
.dt
!= vect_external_def
3866 && TREE_CODE (op
) == SSA_NAME
3867 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3869 && tree_fits_shwi_p (iv
.step
))
3871 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3872 thisarginfo
.op
= iv
.base
;
3874 else if ((thisarginfo
.dt
== vect_constant_def
3875 || thisarginfo
.dt
== vect_external_def
)
3876 && POINTER_TYPE_P (TREE_TYPE (op
)))
3877 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3878 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3880 if (POINTER_TYPE_P (TREE_TYPE (op
))
3881 && !thisarginfo
.linear_step
3883 && thisarginfo
.dt
!= vect_constant_def
3884 && thisarginfo
.dt
!= vect_external_def
3887 && TREE_CODE (op
) == SSA_NAME
)
3888 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3890 arginfo
.quick_push (thisarginfo
);
3893 unsigned HOST_WIDE_INT vf
;
3894 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3898 "not considering SIMD clones; not yet supported"
3899 " for variable-width vectors.\n");
3903 unsigned int badness
= 0;
3904 struct cgraph_node
*bestn
= NULL
;
3905 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3906 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3908 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3909 n
= n
->simdclone
->next_clone
)
3911 unsigned int this_badness
= 0;
3912 if (n
->simdclone
->simdlen
> vf
3913 || n
->simdclone
->nargs
!= nargs
)
3915 if (n
->simdclone
->simdlen
< vf
)
3916 this_badness
+= (exact_log2 (vf
)
3917 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3918 if (n
->simdclone
->inbranch
)
3919 this_badness
+= 2048;
3920 int target_badness
= targetm
.simd_clone
.usable (n
);
3921 if (target_badness
< 0)
3923 this_badness
+= target_badness
* 512;
3924 /* FORNOW: Have to add code to add the mask argument. */
3925 if (n
->simdclone
->inbranch
)
3927 for (i
= 0; i
< nargs
; i
++)
3929 switch (n
->simdclone
->args
[i
].arg_type
)
3931 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3932 if (!useless_type_conversion_p
3933 (n
->simdclone
->args
[i
].orig_type
,
3934 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3936 else if (arginfo
[i
].dt
== vect_constant_def
3937 || arginfo
[i
].dt
== vect_external_def
3938 || arginfo
[i
].linear_step
)
3941 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3942 if (arginfo
[i
].dt
!= vect_constant_def
3943 && arginfo
[i
].dt
!= vect_external_def
)
3946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3948 if (arginfo
[i
].dt
== vect_constant_def
3949 || arginfo
[i
].dt
== vect_external_def
3950 || (arginfo
[i
].linear_step
3951 != n
->simdclone
->args
[i
].linear_step
))
3954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3963 case SIMD_CLONE_ARG_TYPE_MASK
:
3966 if (i
== (size_t) -1)
3968 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3973 if (arginfo
[i
].align
)
3974 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3975 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3977 if (i
== (size_t) -1)
3979 if (bestn
== NULL
|| this_badness
< badness
)
3982 badness
= this_badness
;
3989 for (i
= 0; i
< nargs
; i
++)
3990 if ((arginfo
[i
].dt
== vect_constant_def
3991 || arginfo
[i
].dt
== vect_external_def
)
3992 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3995 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3997 if (arginfo
[i
].vectype
== NULL
3998 || (simd_clone_subparts (arginfo
[i
].vectype
)
3999 > bestn
->simdclone
->simdlen
))
4003 fndecl
= bestn
->decl
;
4004 nunits
= bestn
->simdclone
->simdlen
;
4005 ncopies
= vf
/ nunits
;
4007 /* If the function isn't const, only allow it in simd loops where user
4008 has asserted that at least nunits consecutive iterations can be
4009 performed using SIMD instructions. */
4010 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4011 && gimple_vuse (stmt
))
4014 /* Sanity check: make sure that at least one copy of the vectorized stmt
4015 needs to be generated. */
4016 gcc_assert (ncopies
>= 1);
4018 if (!vec_stmt
) /* transformation not required. */
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4021 for (i
= 0; i
< nargs
; i
++)
4022 if ((bestn
->simdclone
->args
[i
].arg_type
4023 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4024 || (bestn
->simdclone
->args
[i
].arg_type
4025 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4030 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4031 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4032 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4033 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4034 tree sll
= arginfo
[i
].simd_lane_linear
4035 ? boolean_true_node
: boolean_false_node
;
4036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4038 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4039 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4040 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4046 if (dump_enabled_p ())
4047 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4050 scalar_dest
= gimple_call_lhs (stmt
);
4051 vec_dest
= NULL_TREE
;
4056 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4057 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4058 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4061 rtype
= TREE_TYPE (ratype
);
4065 prev_stmt_info
= NULL
;
4066 for (j
= 0; j
< ncopies
; ++j
)
4068 /* Build argument list for the vectorized call. */
4070 vargs
.create (nargs
);
4074 for (i
= 0; i
< nargs
; i
++)
4076 unsigned int k
, l
, m
, o
;
4078 op
= gimple_call_arg (stmt
, i
);
4079 switch (bestn
->simdclone
->args
[i
].arg_type
)
4081 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4082 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4083 o
= nunits
/ simd_clone_subparts (atype
);
4084 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4086 if (simd_clone_subparts (atype
)
4087 < simd_clone_subparts (arginfo
[i
].vectype
))
4089 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4090 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4091 / simd_clone_subparts (atype
));
4092 gcc_assert ((k
& (k
- 1)) == 0);
4095 = vect_get_vec_def_for_operand (op
, stmt_info
);
4098 vec_oprnd0
= arginfo
[i
].op
;
4099 if ((m
& (k
- 1)) == 0)
4101 = vect_get_vec_def_for_stmt_copy (vinfo
,
4104 arginfo
[i
].op
= vec_oprnd0
;
4106 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4108 bitsize_int ((m
& (k
- 1)) * prec
));
4110 = gimple_build_assign (make_ssa_name (atype
),
4112 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4113 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4117 k
= (simd_clone_subparts (atype
)
4118 / simd_clone_subparts (arginfo
[i
].vectype
));
4119 gcc_assert ((k
& (k
- 1)) == 0);
4120 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4122 vec_alloc (ctor_elts
, k
);
4125 for (l
= 0; l
< k
; l
++)
4127 if (m
== 0 && l
== 0)
4129 = vect_get_vec_def_for_operand (op
, stmt_info
);
4132 = vect_get_vec_def_for_stmt_copy (vinfo
,
4134 arginfo
[i
].op
= vec_oprnd0
;
4137 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4141 vargs
.safe_push (vec_oprnd0
);
4144 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4146 = gimple_build_assign (make_ssa_name (atype
),
4148 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4150 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4155 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4156 vargs
.safe_push (op
);
4158 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4159 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4164 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4169 edge pe
= loop_preheader_edge (loop
);
4170 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4171 gcc_assert (!new_bb
);
4173 if (arginfo
[i
].simd_lane_linear
)
4175 vargs
.safe_push (arginfo
[i
].op
);
4178 tree phi_res
= copy_ssa_name (op
);
4179 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4180 loop_vinfo
->add_stmt (new_phi
);
4181 add_phi_arg (new_phi
, arginfo
[i
].op
,
4182 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4184 = POINTER_TYPE_P (TREE_TYPE (op
))
4185 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4186 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4187 ? sizetype
: TREE_TYPE (op
);
4189 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4191 tree tcst
= wide_int_to_tree (type
, cst
);
4192 tree phi_arg
= copy_ssa_name (op
);
4194 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4195 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4196 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4197 loop_vinfo
->add_stmt (new_stmt
);
4198 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4200 arginfo
[i
].op
= phi_res
;
4201 vargs
.safe_push (phi_res
);
4206 = POINTER_TYPE_P (TREE_TYPE (op
))
4207 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4208 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4209 ? sizetype
: TREE_TYPE (op
);
4211 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4213 tree tcst
= wide_int_to_tree (type
, cst
);
4214 new_temp
= make_ssa_name (TREE_TYPE (op
));
4216 = gimple_build_assign (new_temp
, code
,
4217 arginfo
[i
].op
, tcst
);
4218 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4219 vargs
.safe_push (new_temp
);
4222 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4223 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4224 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4225 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4226 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4227 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4233 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4236 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4238 new_temp
= create_tmp_var (ratype
);
4239 else if (simd_clone_subparts (vectype
)
4240 == simd_clone_subparts (rtype
))
4241 new_temp
= make_ssa_name (vec_dest
, new_call
);
4243 new_temp
= make_ssa_name (rtype
, new_call
);
4244 gimple_call_set_lhs (new_call
, new_temp
);
4246 stmt_vec_info new_stmt_info
4247 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4251 if (simd_clone_subparts (vectype
) < nunits
)
4254 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4255 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4256 k
= nunits
/ simd_clone_subparts (vectype
);
4257 gcc_assert ((k
& (k
- 1)) == 0);
4258 for (l
= 0; l
< k
; l
++)
4263 t
= build_fold_addr_expr (new_temp
);
4264 t
= build2 (MEM_REF
, vectype
, t
,
4265 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4268 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4269 bitsize_int (prec
), bitsize_int (l
* prec
));
4271 = gimple_build_assign (make_ssa_name (vectype
), t
);
4273 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4275 if (j
== 0 && l
== 0)
4276 STMT_VINFO_VEC_STMT (stmt_info
)
4277 = *vec_stmt
= new_stmt_info
;
4279 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4281 prev_stmt_info
= new_stmt_info
;
4285 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4288 else if (simd_clone_subparts (vectype
) > nunits
)
4290 unsigned int k
= (simd_clone_subparts (vectype
)
4291 / simd_clone_subparts (rtype
));
4292 gcc_assert ((k
& (k
- 1)) == 0);
4293 if ((j
& (k
- 1)) == 0)
4294 vec_alloc (ret_ctor_elts
, k
);
4297 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4298 for (m
= 0; m
< o
; m
++)
4300 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4301 size_int (m
), NULL_TREE
, NULL_TREE
);
4303 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4305 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4307 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4308 gimple_assign_lhs (new_stmt
));
4310 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4313 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4314 if ((j
& (k
- 1)) != k
- 1)
4316 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4318 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4320 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4322 if ((unsigned) j
== k
- 1)
4323 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4325 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4327 prev_stmt_info
= new_stmt_info
;
4332 tree t
= build_fold_addr_expr (new_temp
);
4333 t
= build2 (MEM_REF
, vectype
, t
,
4334 build_int_cst (TREE_TYPE (t
), 0));
4336 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4338 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4339 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4344 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4346 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4348 prev_stmt_info
= new_stmt_info
;
4353 /* The call in STMT might prevent it from being removed in dce.
4354 We however cannot remove it here, due to the way the ssa name
4355 it defines is mapped to the new definition. So just replace
4356 rhs of the statement with something harmless. */
4364 type
= TREE_TYPE (scalar_dest
);
4365 if (is_pattern_stmt_p (stmt_info
))
4366 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
4368 lhs
= gimple_call_lhs (stmt
);
4369 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4372 new_stmt
= gimple_build_nop ();
4373 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4374 set_vinfo_for_stmt (stmt
, NULL
);
4375 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4376 gsi_replace (gsi
, new_stmt
, true);
4377 unlink_stmt_vdef (stmt
);
4383 /* Function vect_gen_widened_results_half
4385 Create a vector stmt whose code, type, number of arguments, and result
4386 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4387 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4388 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4389 needs to be created (DECL is a function-decl of a target-builtin).
4390 STMT_INFO is the original scalar stmt that we are vectorizing. */
4393 vect_gen_widened_results_half (enum tree_code code
,
4395 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4396 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4397 stmt_vec_info stmt_info
)
4402 /* Generate half of the widened result: */
4403 if (code
== CALL_EXPR
)
4405 /* Target specific support */
4406 if (op_type
== binary_op
)
4407 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4409 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4410 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4411 gimple_call_set_lhs (new_stmt
, new_temp
);
4415 /* Generic support */
4416 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4417 if (op_type
!= binary_op
)
4419 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4420 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4421 gimple_assign_set_lhs (new_stmt
, new_temp
);
4423 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4429 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4430 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4431 containing scalar operand), and for the rest we get a copy with
4432 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4433 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4434 The vectors are collected into VEC_OPRNDS. */
4437 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4438 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4440 vec_info
*vinfo
= stmt_info
->vinfo
;
4443 /* Get first vector operand. */
4444 /* All the vector operands except the very first one (that is scalar oprnd)
4446 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4447 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4449 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4451 vec_oprnds
->quick_push (vec_oprnd
);
4453 /* Get second vector operand. */
4454 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4455 vec_oprnds
->quick_push (vec_oprnd
);
4459 /* For conversion in multiple steps, continue to get operands
4462 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4463 multi_step_cvt
- 1);
4467 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4468 For multi-step conversions store the resulting vectors and call the function
4472 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4474 stmt_vec_info stmt_info
,
4476 gimple_stmt_iterator
*gsi
,
4477 slp_tree slp_node
, enum tree_code code
,
4478 stmt_vec_info
*prev_stmt_info
)
4481 tree vop0
, vop1
, new_tmp
, vec_dest
;
4483 vec_dest
= vec_dsts
.pop ();
4485 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4487 /* Create demotion operation. */
4488 vop0
= (*vec_oprnds
)[i
];
4489 vop1
= (*vec_oprnds
)[i
+ 1];
4490 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4491 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4492 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4493 stmt_vec_info new_stmt_info
4494 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4497 /* Store the resulting vector for next recursive call. */
4498 (*vec_oprnds
)[i
/2] = new_tmp
;
4501 /* This is the last step of the conversion sequence. Store the
4502 vectors in SLP_NODE or in vector info of the scalar statement
4503 (or in STMT_VINFO_RELATED_STMT chain). */
4505 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4508 if (!*prev_stmt_info
)
4509 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4511 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4513 *prev_stmt_info
= new_stmt_info
;
4518 /* For multi-step demotion operations we first generate demotion operations
4519 from the source type to the intermediate types, and then combine the
4520 results (stored in VEC_OPRNDS) in demotion operation to the destination
4524 /* At each level of recursion we have half of the operands we had at the
4526 vec_oprnds
->truncate ((i
+1)/2);
4527 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4528 stmt_info
, vec_dsts
, gsi
,
4529 slp_node
, VEC_PACK_TRUNC_EXPR
,
4533 vec_dsts
.quick_push (vec_dest
);
4537 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4538 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4539 STMT_INFO. For multi-step conversions store the resulting vectors and
4540 call the function recursively. */
4543 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4544 vec
<tree
> *vec_oprnds1
,
4545 stmt_vec_info stmt_info
, tree vec_dest
,
4546 gimple_stmt_iterator
*gsi
,
4547 enum tree_code code1
,
4548 enum tree_code code2
, tree decl1
,
4549 tree decl2
, int op_type
)
4552 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4553 gimple
*new_stmt1
, *new_stmt2
;
4554 vec
<tree
> vec_tmp
= vNULL
;
4556 vec_tmp
.create (vec_oprnds0
->length () * 2);
4557 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4559 if (op_type
== binary_op
)
4560 vop1
= (*vec_oprnds1
)[i
];
4564 /* Generate the two halves of promotion operation. */
4565 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4566 op_type
, vec_dest
, gsi
,
4568 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4569 op_type
, vec_dest
, gsi
,
4571 if (is_gimple_call (new_stmt1
))
4573 new_tmp1
= gimple_call_lhs (new_stmt1
);
4574 new_tmp2
= gimple_call_lhs (new_stmt2
);
4578 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4579 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4582 /* Store the results for the next step. */
4583 vec_tmp
.quick_push (new_tmp1
);
4584 vec_tmp
.quick_push (new_tmp2
);
4587 vec_oprnds0
->release ();
4588 *vec_oprnds0
= vec_tmp
;
4592 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4593 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4594 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4595 Return true if STMT_INFO is vectorizable in this way. */
4598 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4599 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4600 stmt_vector_for_cost
*cost_vec
)
4604 tree op0
, op1
= NULL_TREE
;
4605 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4606 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4607 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4608 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4609 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4611 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4613 stmt_vec_info prev_stmt_info
;
4614 poly_uint64 nunits_in
;
4615 poly_uint64 nunits_out
;
4616 tree vectype_out
, vectype_in
;
4618 tree lhs_type
, rhs_type
;
4619 enum { NARROW
, NONE
, WIDEN
} modifier
;
4620 vec
<tree
> vec_oprnds0
= vNULL
;
4621 vec
<tree
> vec_oprnds1
= vNULL
;
4623 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4624 vec_info
*vinfo
= stmt_info
->vinfo
;
4625 int multi_step_cvt
= 0;
4626 vec
<tree
> interm_types
= vNULL
;
4627 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4629 unsigned short fltsz
;
4631 /* Is STMT a vectorizable conversion? */
4633 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4636 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4640 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4644 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4647 code
= gimple_assign_rhs_code (stmt
);
4648 if (!CONVERT_EXPR_CODE_P (code
)
4649 && code
!= FIX_TRUNC_EXPR
4650 && code
!= FLOAT_EXPR
4651 && code
!= WIDEN_MULT_EXPR
4652 && code
!= WIDEN_LSHIFT_EXPR
)
4655 op_type
= TREE_CODE_LENGTH (code
);
4657 /* Check types of lhs and rhs. */
4658 scalar_dest
= gimple_assign_lhs (stmt
);
4659 lhs_type
= TREE_TYPE (scalar_dest
);
4660 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4662 op0
= gimple_assign_rhs1 (stmt
);
4663 rhs_type
= TREE_TYPE (op0
);
4665 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4666 && !((INTEGRAL_TYPE_P (lhs_type
)
4667 && INTEGRAL_TYPE_P (rhs_type
))
4668 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4669 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4672 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4673 && ((INTEGRAL_TYPE_P (lhs_type
)
4674 && !type_has_mode_precision_p (lhs_type
))
4675 || (INTEGRAL_TYPE_P (rhs_type
)
4676 && !type_has_mode_precision_p (rhs_type
))))
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4680 "type conversion to/from bit-precision unsupported."
4685 /* Check the operands of the operation. */
4686 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4690 "use not simple.\n");
4693 if (op_type
== binary_op
)
4697 op1
= gimple_assign_rhs2 (stmt
);
4698 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4699 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4701 if (CONSTANT_CLASS_P (op0
))
4702 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4704 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4708 if (dump_enabled_p ())
4709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4710 "use not simple.\n");
4715 /* If op0 is an external or constant defs use a vector type of
4716 the same size as the output vector type. */
4718 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4720 gcc_assert (vectype_in
);
4723 if (dump_enabled_p ())
4725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4726 "no vectype for scalar type ");
4727 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4728 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4734 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4735 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4737 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4740 "can't convert between boolean and non "
4742 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4743 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4749 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4750 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4751 if (known_eq (nunits_out
, nunits_in
))
4753 else if (multiple_p (nunits_out
, nunits_in
))
4757 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4761 /* Multiple types in SLP are handled by creating the appropriate number of
4762 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4766 else if (modifier
== NARROW
)
4767 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4769 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4771 /* Sanity check: make sure that at least one copy of the vectorized stmt
4772 needs to be generated. */
4773 gcc_assert (ncopies
>= 1);
4775 bool found_mode
= false;
4776 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4777 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4778 opt_scalar_mode rhs_mode_iter
;
4780 /* Supportable by target? */
4784 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4786 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4791 if (dump_enabled_p ())
4792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4793 "conversion not supported by target.\n");
4797 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4798 vectype_in
, &code1
, &code2
,
4799 &multi_step_cvt
, &interm_types
))
4801 /* Binary widening operation can only be supported directly by the
4803 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4807 if (code
!= FLOAT_EXPR
4808 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4811 fltsz
= GET_MODE_SIZE (lhs_mode
);
4812 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4814 rhs_mode
= rhs_mode_iter
.require ();
4815 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4819 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4820 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4821 if (cvt_type
== NULL_TREE
)
4824 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4826 if (!supportable_convert_operation (code
, vectype_out
,
4827 cvt_type
, &decl1
, &codecvt1
))
4830 else if (!supportable_widening_operation (code
, stmt_info
,
4831 vectype_out
, cvt_type
,
4832 &codecvt1
, &codecvt2
,
4837 gcc_assert (multi_step_cvt
== 0);
4839 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4840 vectype_in
, &code1
, &code2
,
4841 &multi_step_cvt
, &interm_types
))
4851 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4852 codecvt2
= ERROR_MARK
;
4856 interm_types
.safe_push (cvt_type
);
4857 cvt_type
= NULL_TREE
;
4862 gcc_assert (op_type
== unary_op
);
4863 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4864 &code1
, &multi_step_cvt
,
4868 if (code
!= FIX_TRUNC_EXPR
4869 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4873 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4874 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4875 if (cvt_type
== NULL_TREE
)
4877 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4880 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4881 &code1
, &multi_step_cvt
,
4890 if (!vec_stmt
) /* transformation not required. */
4892 DUMP_VECT_SCOPE ("vectorizable_conversion");
4893 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4895 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4896 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4899 else if (modifier
== NARROW
)
4901 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4902 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4907 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4908 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4911 interm_types
.release ();
4916 if (dump_enabled_p ())
4917 dump_printf_loc (MSG_NOTE
, vect_location
,
4918 "transform conversion. ncopies = %d.\n", ncopies
);
4920 if (op_type
== binary_op
)
4922 if (CONSTANT_CLASS_P (op0
))
4923 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4924 else if (CONSTANT_CLASS_P (op1
))
4925 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4928 /* In case of multi-step conversion, we first generate conversion operations
4929 to the intermediate types, and then from that types to the final one.
4930 We create vector destinations for the intermediate type (TYPES) received
4931 from supportable_*_operation, and store them in the correct order
4932 for future use in vect_create_vectorized_*_stmts (). */
4933 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4934 vec_dest
= vect_create_destination_var (scalar_dest
,
4935 (cvt_type
&& modifier
== WIDEN
)
4936 ? cvt_type
: vectype_out
);
4937 vec_dsts
.quick_push (vec_dest
);
4941 for (i
= interm_types
.length () - 1;
4942 interm_types
.iterate (i
, &intermediate_type
); i
--)
4944 vec_dest
= vect_create_destination_var (scalar_dest
,
4946 vec_dsts
.quick_push (vec_dest
);
4951 vec_dest
= vect_create_destination_var (scalar_dest
,
4953 ? vectype_out
: cvt_type
);
4957 if (modifier
== WIDEN
)
4959 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4960 if (op_type
== binary_op
)
4961 vec_oprnds1
.create (1);
4963 else if (modifier
== NARROW
)
4964 vec_oprnds0
.create (
4965 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4967 else if (code
== WIDEN_LSHIFT_EXPR
)
4968 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4971 prev_stmt_info
= NULL
;
4975 for (j
= 0; j
< ncopies
; j
++)
4978 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
4981 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
4983 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4985 stmt_vec_info new_stmt_info
;
4986 /* Arguments are ready, create the new vector stmt. */
4987 if (code1
== CALL_EXPR
)
4989 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4990 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4991 gimple_call_set_lhs (new_stmt
, new_temp
);
4993 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4997 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4999 = gimple_build_assign (vec_dest
, code1
, vop0
);
5000 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5001 gimple_assign_set_lhs (new_stmt
, new_temp
);
5003 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5007 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5010 if (!prev_stmt_info
)
5011 STMT_VINFO_VEC_STMT (stmt_info
)
5012 = *vec_stmt
= new_stmt_info
;
5014 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5015 prev_stmt_info
= new_stmt_info
;
5022 /* In case the vectorization factor (VF) is bigger than the number
5023 of elements that we can fit in a vectype (nunits), we have to
5024 generate more than one vector stmt - i.e - we need to "unroll"
5025 the vector stmt by a factor VF/nunits. */
5026 for (j
= 0; j
< ncopies
; j
++)
5033 if (code
== WIDEN_LSHIFT_EXPR
)
5038 /* Store vec_oprnd1 for every vector stmt to be created
5039 for SLP_NODE. We check during the analysis that all
5040 the shift arguments are the same. */
5041 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5042 vec_oprnds1
.quick_push (vec_oprnd1
);
5044 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
5045 &vec_oprnds0
, NULL
, slp_node
);
5048 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
5049 &vec_oprnds1
, slp_node
);
5053 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5054 vec_oprnds0
.quick_push (vec_oprnd0
);
5055 if (op_type
== binary_op
)
5057 if (code
== WIDEN_LSHIFT_EXPR
)
5061 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5062 vec_oprnds1
.quick_push (vec_oprnd1
);
5068 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5069 vec_oprnds0
.truncate (0);
5070 vec_oprnds0
.quick_push (vec_oprnd0
);
5071 if (op_type
== binary_op
)
5073 if (code
== WIDEN_LSHIFT_EXPR
)
5076 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5078 vec_oprnds1
.truncate (0);
5079 vec_oprnds1
.quick_push (vec_oprnd1
);
5083 /* Arguments are ready. Create the new vector stmts. */
5084 for (i
= multi_step_cvt
; i
>= 0; i
--)
5086 tree this_dest
= vec_dsts
[i
];
5087 enum tree_code c1
= code1
, c2
= code2
;
5088 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5093 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5094 &vec_oprnds1
, stmt_info
,
5096 c1
, c2
, decl1
, decl2
,
5100 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5102 stmt_vec_info new_stmt_info
;
5105 if (codecvt1
== CALL_EXPR
)
5107 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5108 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5109 gimple_call_set_lhs (new_stmt
, new_temp
);
5111 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5116 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5117 new_temp
= make_ssa_name (vec_dest
);
5119 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5121 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5126 new_stmt_info
= vinfo
->lookup_def (vop0
);
5129 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5132 if (!prev_stmt_info
)
5133 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5135 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5136 prev_stmt_info
= new_stmt_info
;
5141 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5145 /* In case the vectorization factor (VF) is bigger than the number
5146 of elements that we can fit in a vectype (nunits), we have to
5147 generate more than one vector stmt - i.e - we need to "unroll"
5148 the vector stmt by a factor VF/nunits. */
5149 for (j
= 0; j
< ncopies
; j
++)
5153 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5157 vec_oprnds0
.truncate (0);
5158 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5159 vect_pow2 (multi_step_cvt
) - 1);
5162 /* Arguments are ready. Create the new vector stmts. */
5164 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5166 if (codecvt1
== CALL_EXPR
)
5168 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5169 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5170 gimple_call_set_lhs (new_stmt
, new_temp
);
5171 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5175 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5176 new_temp
= make_ssa_name (vec_dest
);
5178 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5179 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5182 vec_oprnds0
[i
] = new_temp
;
5185 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5186 stmt_info
, vec_dsts
, gsi
,
5191 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5195 vec_oprnds0
.release ();
5196 vec_oprnds1
.release ();
5197 interm_types
.release ();
5203 /* Function vectorizable_assignment.
5205 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5206 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5207 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5208 Return true if STMT_INFO is vectorizable in this way. */
5211 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5212 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5213 stmt_vector_for_cost
*cost_vec
)
5218 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5220 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5224 vec
<tree
> vec_oprnds
= vNULL
;
5226 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5227 vec_info
*vinfo
= stmt_info
->vinfo
;
5228 stmt_vec_info prev_stmt_info
= NULL
;
5229 enum tree_code code
;
5232 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5235 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5239 /* Is vectorizable assignment? */
5240 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5244 scalar_dest
= gimple_assign_lhs (stmt
);
5245 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5248 code
= gimple_assign_rhs_code (stmt
);
5249 if (gimple_assign_single_p (stmt
)
5250 || code
== PAREN_EXPR
5251 || CONVERT_EXPR_CODE_P (code
))
5252 op
= gimple_assign_rhs1 (stmt
);
5256 if (code
== VIEW_CONVERT_EXPR
)
5257 op
= TREE_OPERAND (op
, 0);
5259 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5260 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5262 /* Multiple types in SLP are handled by creating the appropriate number of
5263 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5268 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5270 gcc_assert (ncopies
>= 1);
5272 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5274 if (dump_enabled_p ())
5275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5276 "use not simple.\n");
5280 /* We can handle NOP_EXPR conversions that do not change the number
5281 of elements or the vector size. */
5282 if ((CONVERT_EXPR_CODE_P (code
)
5283 || code
== VIEW_CONVERT_EXPR
)
5285 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5286 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5287 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5290 /* We do not handle bit-precision changes. */
5291 if ((CONVERT_EXPR_CODE_P (code
)
5292 || code
== VIEW_CONVERT_EXPR
)
5293 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5294 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5295 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5296 /* But a conversion that does not change the bit-pattern is ok. */
5297 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5298 > TYPE_PRECISION (TREE_TYPE (op
)))
5299 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5300 /* Conversion between boolean types of different sizes is
5301 a simple assignment in case their vectypes are same
5303 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5304 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5308 "type conversion to/from bit-precision "
5313 if (!vec_stmt
) /* transformation not required. */
5315 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5316 DUMP_VECT_SCOPE ("vectorizable_assignment");
5317 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5326 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5329 for (j
= 0; j
< ncopies
; j
++)
5333 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5335 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5337 /* Arguments are ready. create the new vector stmt. */
5338 stmt_vec_info new_stmt_info
= NULL
;
5339 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5341 if (CONVERT_EXPR_CODE_P (code
)
5342 || code
== VIEW_CONVERT_EXPR
)
5343 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5344 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5345 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5346 gimple_assign_set_lhs (new_stmt
, new_temp
);
5348 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5350 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5357 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5359 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5361 prev_stmt_info
= new_stmt_info
;
5364 vec_oprnds
.release ();
5369 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5370 either as shift by a scalar or by a vector. */
5373 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5376 machine_mode vec_mode
;
5381 vectype
= get_vectype_for_scalar_type (scalar_type
);
5385 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5387 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5389 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5391 || (optab_handler (optab
, TYPE_MODE (vectype
))
5392 == CODE_FOR_nothing
))
5396 vec_mode
= TYPE_MODE (vectype
);
5397 icode
= (int) optab_handler (optab
, vec_mode
);
5398 if (icode
== CODE_FOR_nothing
)
5405 /* Function vectorizable_shift.
5407 Check if STMT_INFO performs a shift operation that can be vectorized.
5408 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5409 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5410 Return true if STMT_INFO is vectorizable in this way. */
5413 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5414 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5415 stmt_vector_for_cost
*cost_vec
)
5419 tree op0
, op1
= NULL
;
5420 tree vec_oprnd1
= NULL_TREE
;
5422 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5423 enum tree_code code
;
5424 machine_mode vec_mode
;
5428 machine_mode optab_op2_mode
;
5429 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5431 stmt_vec_info prev_stmt_info
;
5432 poly_uint64 nunits_in
;
5433 poly_uint64 nunits_out
;
5438 vec
<tree
> vec_oprnds0
= vNULL
;
5439 vec
<tree
> vec_oprnds1
= vNULL
;
5442 bool scalar_shift_arg
= true;
5443 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5444 vec_info
*vinfo
= stmt_info
->vinfo
;
5446 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5449 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5453 /* Is STMT a vectorizable binary/unary operation? */
5454 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5458 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5461 code
= gimple_assign_rhs_code (stmt
);
5463 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5464 || code
== RROTATE_EXPR
))
5467 scalar_dest
= gimple_assign_lhs (stmt
);
5468 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5469 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5471 if (dump_enabled_p ())
5472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5473 "bit-precision shifts not supported.\n");
5477 op0
= gimple_assign_rhs1 (stmt
);
5478 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5480 if (dump_enabled_p ())
5481 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5482 "use not simple.\n");
5485 /* If op0 is an external or constant def use a vector type with
5486 the same size as the output vector type. */
5488 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5490 gcc_assert (vectype
);
5493 if (dump_enabled_p ())
5494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5495 "no vectype for scalar type\n");
5499 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5500 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5501 if (maybe_ne (nunits_out
, nunits_in
))
5504 op1
= gimple_assign_rhs2 (stmt
);
5505 stmt_vec_info op1_def_stmt_info
;
5506 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5507 &op1_def_stmt_info
))
5509 if (dump_enabled_p ())
5510 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5511 "use not simple.\n");
5515 /* Multiple types in SLP are handled by creating the appropriate number of
5516 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5521 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5523 gcc_assert (ncopies
>= 1);
5525 /* Determine whether the shift amount is a vector, or scalar. If the
5526 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5528 if ((dt
[1] == vect_internal_def
5529 || dt
[1] == vect_induction_def
)
5531 scalar_shift_arg
= false;
5532 else if (dt
[1] == vect_constant_def
5533 || dt
[1] == vect_external_def
5534 || dt
[1] == vect_internal_def
)
5536 /* In SLP, need to check whether the shift count is the same,
5537 in loops if it is a constant or invariant, it is always
5541 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5542 stmt_vec_info slpstmt_info
;
5544 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5546 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5547 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5548 scalar_shift_arg
= false;
5552 /* If the shift amount is computed by a pattern stmt we cannot
5553 use the scalar amount directly thus give up and use a vector
5555 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5556 scalar_shift_arg
= false;
5560 if (dump_enabled_p ())
5561 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5562 "operand mode requires invariant argument.\n");
5566 /* Vector shifted by vector. */
5567 if (!scalar_shift_arg
)
5569 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5570 if (dump_enabled_p ())
5571 dump_printf_loc (MSG_NOTE
, vect_location
,
5572 "vector/vector shift/rotate found.\n");
5575 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5576 if (op1_vectype
== NULL_TREE
5577 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5581 "unusable type for last operand in"
5582 " vector/vector shift/rotate.\n");
5586 /* See if the machine has a vector shifted by scalar insn and if not
5587 then see if it has a vector shifted by vector insn. */
5590 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5592 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5594 if (dump_enabled_p ())
5595 dump_printf_loc (MSG_NOTE
, vect_location
,
5596 "vector/scalar shift/rotate found.\n");
5600 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5602 && (optab_handler (optab
, TYPE_MODE (vectype
))
5603 != CODE_FOR_nothing
))
5605 scalar_shift_arg
= false;
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_NOTE
, vect_location
,
5609 "vector/vector shift/rotate found.\n");
5611 /* Unlike the other binary operators, shifts/rotates have
5612 the rhs being int, instead of the same type as the lhs,
5613 so make sure the scalar is the right type if we are
5614 dealing with vectors of long long/long/short/char. */
5615 if (dt
[1] == vect_constant_def
)
5616 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5617 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5621 && TYPE_MODE (TREE_TYPE (vectype
))
5622 != TYPE_MODE (TREE_TYPE (op1
)))
5624 if (dump_enabled_p ())
5625 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5626 "unusable type for last operand in"
5627 " vector/vector shift/rotate.\n");
5630 if (vec_stmt
&& !slp_node
)
5632 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5633 op1
= vect_init_vector (stmt_info
, op1
,
5634 TREE_TYPE (vectype
), NULL
);
5641 /* Supportable by target? */
5644 if (dump_enabled_p ())
5645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5649 vec_mode
= TYPE_MODE (vectype
);
5650 icode
= (int) optab_handler (optab
, vec_mode
);
5651 if (icode
== CODE_FOR_nothing
)
5653 if (dump_enabled_p ())
5654 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5655 "op not supported by target.\n");
5656 /* Check only during analysis. */
5657 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5659 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE
, vect_location
,
5663 "proceeding using word mode.\n");
5666 /* Worthwhile without SIMD support? Check only during analysis. */
5668 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5669 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5671 if (dump_enabled_p ())
5672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5673 "not worthwhile without SIMD support.\n");
5677 if (!vec_stmt
) /* transformation not required. */
5679 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5680 DUMP_VECT_SCOPE ("vectorizable_shift");
5681 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5687 if (dump_enabled_p ())
5688 dump_printf_loc (MSG_NOTE
, vect_location
,
5689 "transform binary/unary operation.\n");
5692 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5694 prev_stmt_info
= NULL
;
5695 for (j
= 0; j
< ncopies
; j
++)
5700 if (scalar_shift_arg
)
5702 /* Vector shl and shr insn patterns can be defined with scalar
5703 operand 2 (shift operand). In this case, use constant or loop
5704 invariant op1 directly, without extending it to vector mode
5706 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5707 if (!VECTOR_MODE_P (optab_op2_mode
))
5709 if (dump_enabled_p ())
5710 dump_printf_loc (MSG_NOTE
, vect_location
,
5711 "operand 1 using scalar mode.\n");
5713 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5714 vec_oprnds1
.quick_push (vec_oprnd1
);
5717 /* Store vec_oprnd1 for every vector stmt to be created
5718 for SLP_NODE. We check during the analysis that all
5719 the shift arguments are the same.
5720 TODO: Allow different constants for different vector
5721 stmts generated for an SLP instance. */
5722 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5723 vec_oprnds1
.quick_push (vec_oprnd1
);
5728 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5729 (a special case for certain kind of vector shifts); otherwise,
5730 operand 1 should be of a vector type (the usual case). */
5732 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5735 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5739 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5741 /* Arguments are ready. Create the new vector stmt. */
5742 stmt_vec_info new_stmt_info
= NULL
;
5743 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5745 vop1
= vec_oprnds1
[i
];
5746 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5747 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5748 gimple_assign_set_lhs (new_stmt
, new_temp
);
5750 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5752 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5759 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5761 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5762 prev_stmt_info
= new_stmt_info
;
5765 vec_oprnds0
.release ();
5766 vec_oprnds1
.release ();
5772 /* Function vectorizable_operation.
5774 Check if STMT_INFO performs a binary, unary or ternary operation that can
5776 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5777 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5778 Return true if STMT_INFO is vectorizable in this way. */
5781 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5782 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5783 stmt_vector_for_cost
*cost_vec
)
5787 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5789 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5790 enum tree_code code
, orig_code
;
5791 machine_mode vec_mode
;
5795 bool target_support_p
;
5796 enum vect_def_type dt
[3]
5797 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5799 stmt_vec_info prev_stmt_info
;
5800 poly_uint64 nunits_in
;
5801 poly_uint64 nunits_out
;
5805 vec
<tree
> vec_oprnds0
= vNULL
;
5806 vec
<tree
> vec_oprnds1
= vNULL
;
5807 vec
<tree
> vec_oprnds2
= vNULL
;
5808 tree vop0
, vop1
, vop2
;
5809 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5810 vec_info
*vinfo
= stmt_info
->vinfo
;
5812 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5815 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5819 /* Is STMT a vectorizable binary/unary operation? */
5820 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5824 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5827 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5829 /* For pointer addition and subtraction, we should use the normal
5830 plus and minus for the vector operation. */
5831 if (code
== POINTER_PLUS_EXPR
)
5833 if (code
== POINTER_DIFF_EXPR
)
5836 /* Support only unary or binary operations. */
5837 op_type
= TREE_CODE_LENGTH (code
);
5838 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5840 if (dump_enabled_p ())
5841 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5842 "num. args = %d (not unary/binary/ternary op).\n",
5847 scalar_dest
= gimple_assign_lhs (stmt
);
5848 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5850 /* Most operations cannot handle bit-precision types without extra
5852 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5853 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5854 /* Exception are bitwise binary operations. */
5855 && code
!= BIT_IOR_EXPR
5856 && code
!= BIT_XOR_EXPR
5857 && code
!= BIT_AND_EXPR
)
5859 if (dump_enabled_p ())
5860 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5861 "bit-precision arithmetic not supported.\n");
5865 op0
= gimple_assign_rhs1 (stmt
);
5866 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5868 if (dump_enabled_p ())
5869 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5870 "use not simple.\n");
5873 /* If op0 is an external or constant def use a vector type with
5874 the same size as the output vector type. */
5877 /* For boolean type we cannot determine vectype by
5878 invariant value (don't know whether it is a vector
5879 of booleans or vector of integers). We use output
5880 vectype because operations on boolean don't change
5882 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5884 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5888 "not supported operation on bool value.\n");
5891 vectype
= vectype_out
;
5894 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5897 gcc_assert (vectype
);
5900 if (dump_enabled_p ())
5902 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5903 "no vectype for scalar type ");
5904 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5906 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5912 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5913 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5914 if (maybe_ne (nunits_out
, nunits_in
))
5917 if (op_type
== binary_op
|| op_type
== ternary_op
)
5919 op1
= gimple_assign_rhs2 (stmt
);
5920 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5924 "use not simple.\n");
5928 if (op_type
== ternary_op
)
5930 op2
= gimple_assign_rhs3 (stmt
);
5931 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
5933 if (dump_enabled_p ())
5934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5935 "use not simple.\n");
5940 /* Multiple types in SLP are handled by creating the appropriate number of
5941 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5946 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5948 gcc_assert (ncopies
>= 1);
5950 /* Shifts are handled in vectorizable_shift (). */
5951 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5952 || code
== RROTATE_EXPR
)
5955 /* Supportable by target? */
5957 vec_mode
= TYPE_MODE (vectype
);
5958 if (code
== MULT_HIGHPART_EXPR
)
5959 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5962 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5965 if (dump_enabled_p ())
5966 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5970 target_support_p
= (optab_handler (optab
, vec_mode
)
5971 != CODE_FOR_nothing
);
5974 if (!target_support_p
)
5976 if (dump_enabled_p ())
5977 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5978 "op not supported by target.\n");
5979 /* Check only during analysis. */
5980 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5981 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5983 if (dump_enabled_p ())
5984 dump_printf_loc (MSG_NOTE
, vect_location
,
5985 "proceeding using word mode.\n");
5988 /* Worthwhile without SIMD support? Check only during analysis. */
5989 if (!VECTOR_MODE_P (vec_mode
)
5991 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5993 if (dump_enabled_p ())
5994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5995 "not worthwhile without SIMD support.\n");
5999 if (!vec_stmt
) /* transformation not required. */
6001 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6002 DUMP_VECT_SCOPE ("vectorizable_operation");
6003 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6009 if (dump_enabled_p ())
6010 dump_printf_loc (MSG_NOTE
, vect_location
,
6011 "transform binary/unary operation.\n");
6013 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6014 vectors with unsigned elements, but the result is signed. So, we
6015 need to compute the MINUS_EXPR into vectype temporary and
6016 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6017 tree vec_cvt_dest
= NULL_TREE
;
6018 if (orig_code
== POINTER_DIFF_EXPR
)
6020 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6021 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6025 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6027 /* In case the vectorization factor (VF) is bigger than the number
6028 of elements that we can fit in a vectype (nunits), we have to generate
6029 more than one vector stmt - i.e - we need to "unroll" the
6030 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6031 from one copy of the vector stmt to the next, in the field
6032 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6033 stages to find the correct vector defs to be used when vectorizing
6034 stmts that use the defs of the current stmt. The example below
6035 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6036 we need to create 4 vectorized stmts):
6038 before vectorization:
6039 RELATED_STMT VEC_STMT
6043 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6045 RELATED_STMT VEC_STMT
6046 VS1_0: vx0 = memref0 VS1_1 -
6047 VS1_1: vx1 = memref1 VS1_2 -
6048 VS1_2: vx2 = memref2 VS1_3 -
6049 VS1_3: vx3 = memref3 - -
6050 S1: x = load - VS1_0
6053 step2: vectorize stmt S2 (done here):
6054 To vectorize stmt S2 we first need to find the relevant vector
6055 def for the first operand 'x'. This is, as usual, obtained from
6056 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6057 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6058 relevant vector def 'vx0'. Having found 'vx0' we can generate
6059 the vector stmt VS2_0, and as usual, record it in the
6060 STMT_VINFO_VEC_STMT of stmt S2.
6061 When creating the second copy (VS2_1), we obtain the relevant vector
6062 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6063 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6064 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6065 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6066 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6067 chain of stmts and pointers:
6068 RELATED_STMT VEC_STMT
6069 VS1_0: vx0 = memref0 VS1_1 -
6070 VS1_1: vx1 = memref1 VS1_2 -
6071 VS1_2: vx2 = memref2 VS1_3 -
6072 VS1_3: vx3 = memref3 - -
6073 S1: x = load - VS1_0
6074 VS2_0: vz0 = vx0 + v1 VS2_1 -
6075 VS2_1: vz1 = vx1 + v1 VS2_2 -
6076 VS2_2: vz2 = vx2 + v1 VS2_3 -
6077 VS2_3: vz3 = vx3 + v1 - -
6078 S2: z = x + 1 - VS2_0 */
6080 prev_stmt_info
= NULL
;
6081 for (j
= 0; j
< ncopies
; j
++)
6086 if (op_type
== binary_op
)
6087 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6089 else if (op_type
== ternary_op
)
6093 auto_vec
<tree
> ops(3);
6094 ops
.quick_push (op0
);
6095 ops
.quick_push (op1
);
6096 ops
.quick_push (op2
);
6097 auto_vec
<vec
<tree
> > vec_defs(3);
6098 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
6099 vec_oprnds0
= vec_defs
[0];
6100 vec_oprnds1
= vec_defs
[1];
6101 vec_oprnds2
= vec_defs
[2];
6105 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6106 &vec_oprnds1
, NULL
);
6107 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6112 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6117 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6118 if (op_type
== ternary_op
)
6120 tree vec_oprnd
= vec_oprnds2
.pop ();
6121 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6126 /* Arguments are ready. Create the new vector stmt. */
6127 stmt_vec_info new_stmt_info
= NULL
;
6128 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6130 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6131 ? vec_oprnds1
[i
] : NULL_TREE
);
6132 vop2
= ((op_type
== ternary_op
)
6133 ? vec_oprnds2
[i
] : NULL_TREE
);
6134 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6136 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6137 gimple_assign_set_lhs (new_stmt
, new_temp
);
6139 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6142 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6144 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6146 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6147 gimple_assign_set_lhs (new_stmt
, new_temp
);
6149 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6152 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6159 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6161 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6162 prev_stmt_info
= new_stmt_info
;
6165 vec_oprnds0
.release ();
6166 vec_oprnds1
.release ();
6167 vec_oprnds2
.release ();
6172 /* A helper function to ensure data reference DR's base alignment. */
6175 ensure_base_align (struct data_reference
*dr
)
6177 if (DR_VECT_AUX (dr
)->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6180 if (DR_VECT_AUX (dr
)->base_misaligned
)
6182 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
6184 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
6186 if (decl_in_symtab_p (base_decl
))
6187 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6190 SET_DECL_ALIGN (base_decl
, align_base_to
);
6191 DECL_USER_ALIGN (base_decl
) = 1;
6193 DR_VECT_AUX (dr
)->base_misaligned
= false;
6198 /* Function get_group_alias_ptr_type.
6200 Return the alias type for the group starting at FIRST_STMT_INFO. */
6203 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6205 struct data_reference
*first_dr
, *next_dr
;
6207 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6208 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6209 while (next_stmt_info
)
6211 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6212 if (get_alias_set (DR_REF (first_dr
))
6213 != get_alias_set (DR_REF (next_dr
)))
6215 if (dump_enabled_p ())
6216 dump_printf_loc (MSG_NOTE
, vect_location
,
6217 "conflicting alias set types.\n");
6218 return ptr_type_node
;
6220 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6222 return reference_alias_ptr_type (DR_REF (first_dr
));
6226 /* Function vectorizable_store.
6228 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6229 that can be vectorized.
6230 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6231 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6232 Return true if STMT_INFO is vectorizable in this way. */
6235 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6236 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
6237 stmt_vector_for_cost
*cost_vec
)
6241 tree vec_oprnd
= NULL_TREE
;
6242 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6244 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6245 struct loop
*loop
= NULL
;
6246 machine_mode vec_mode
;
6248 enum dr_alignment_support alignment_support_scheme
;
6249 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
6250 enum vect_def_type mask_dt
= vect_unknown_def_type
;
6251 stmt_vec_info prev_stmt_info
= NULL
;
6252 tree dataref_ptr
= NULL_TREE
;
6253 tree dataref_offset
= NULL_TREE
;
6254 gimple
*ptr_incr
= NULL
;
6257 stmt_vec_info first_stmt_info
;
6259 unsigned int group_size
, i
;
6260 vec
<tree
> oprnds
= vNULL
;
6261 vec
<tree
> result_chain
= vNULL
;
6263 tree offset
= NULL_TREE
;
6264 vec
<tree
> vec_oprnds
= vNULL
;
6265 bool slp
= (slp_node
!= NULL
);
6266 unsigned int vec_num
;
6267 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6268 vec_info
*vinfo
= stmt_info
->vinfo
;
6270 gather_scatter_info gs_info
;
6272 vec_load_store_type vls_type
;
6275 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6278 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6282 /* Is vectorizable store? */
6284 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6285 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6287 tree scalar_dest
= gimple_assign_lhs (assign
);
6288 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
6289 && is_pattern_stmt_p (stmt_info
))
6290 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
6291 if (TREE_CODE (scalar_dest
) != ARRAY_REF
6292 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
6293 && TREE_CODE (scalar_dest
) != INDIRECT_REF
6294 && TREE_CODE (scalar_dest
) != COMPONENT_REF
6295 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
6296 && TREE_CODE (scalar_dest
) != REALPART_EXPR
6297 && TREE_CODE (scalar_dest
) != MEM_REF
)
6302 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
6303 if (!call
|| !gimple_call_internal_p (call
))
6306 internal_fn ifn
= gimple_call_internal_fn (call
);
6307 if (!internal_store_fn_p (ifn
))
6310 if (slp_node
!= NULL
)
6312 if (dump_enabled_p ())
6313 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6314 "SLP of masked stores not supported.\n");
6318 int mask_index
= internal_fn_mask_index (ifn
);
6319 if (mask_index
>= 0)
6321 mask
= gimple_call_arg (call
, mask_index
);
6322 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
6328 op
= vect_get_store_rhs (stmt_info
);
6330 /* Cannot have hybrid store SLP -- that would mean storing to the
6331 same location twice. */
6332 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
6334 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
6335 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6339 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6340 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6345 /* Multiple types in SLP are handled by creating the appropriate number of
6346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6351 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6353 gcc_assert (ncopies
>= 1);
6355 /* FORNOW. This restriction should be relaxed. */
6356 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6360 "multiple types in nested loop.\n");
6364 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
6367 elem_type
= TREE_TYPE (vectype
);
6368 vec_mode
= TYPE_MODE (vectype
);
6370 if (!STMT_VINFO_DATA_REF (stmt_info
))
6373 vect_memory_access_type memory_access_type
;
6374 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
6375 &memory_access_type
, &gs_info
))
6380 if (memory_access_type
== VMAT_CONTIGUOUS
)
6382 if (!VECTOR_MODE_P (vec_mode
)
6383 || !can_vec_mask_load_store_p (vec_mode
,
6384 TYPE_MODE (mask_vectype
), false))
6387 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
6388 && (memory_access_type
!= VMAT_GATHER_SCATTER
|| gs_info
.decl
))
6390 if (dump_enabled_p ())
6391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6392 "unsupported access type for masked store.\n");
6398 /* FORNOW. In some cases can vectorize even if data-type not supported
6399 (e.g. - array initialization with 0). */
6400 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
6404 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6405 && memory_access_type
!= VMAT_GATHER_SCATTER
6406 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
6409 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
6410 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6411 group_size
= DR_GROUP_SIZE (first_stmt_info
);
6415 first_stmt_info
= stmt_info
;
6417 group_size
= vec_num
= 1;
6420 if (!vec_stmt
) /* transformation not required. */
6422 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6425 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
6426 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
6427 memory_access_type
, &gs_info
);
6429 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
6430 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
6431 vls_type
, slp_node
, cost_vec
);
6434 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6438 ensure_base_align (dr
);
6440 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
6442 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
6443 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6444 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6445 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
6446 edge pe
= loop_preheader_edge (loop
);
6449 enum { NARROW
, NONE
, WIDEN
} modifier
;
6450 poly_uint64 scatter_off_nunits
6451 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6453 if (known_eq (nunits
, scatter_off_nunits
))
6455 else if (known_eq (nunits
* 2, scatter_off_nunits
))
6459 /* Currently gathers and scatters are only supported for
6460 fixed-length vectors. */
6461 unsigned int count
= scatter_off_nunits
.to_constant ();
6462 vec_perm_builder
sel (count
, count
, 1);
6463 for (i
= 0; i
< (unsigned int) count
; ++i
)
6464 sel
.quick_push (i
| (count
/ 2));
6466 vec_perm_indices
indices (sel
, 1, count
);
6467 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6469 gcc_assert (perm_mask
!= NULL_TREE
);
6471 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6475 /* Currently gathers and scatters are only supported for
6476 fixed-length vectors. */
6477 unsigned int count
= nunits
.to_constant ();
6478 vec_perm_builder
sel (count
, count
, 1);
6479 for (i
= 0; i
< (unsigned int) count
; ++i
)
6480 sel
.quick_push (i
| (count
/ 2));
6482 vec_perm_indices
indices (sel
, 2, count
);
6483 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6484 gcc_assert (perm_mask
!= NULL_TREE
);
6490 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6491 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6492 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6493 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6494 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6495 scaletype
= TREE_VALUE (arglist
);
6497 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6498 && TREE_CODE (rettype
) == VOID_TYPE
);
6500 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6501 if (!is_gimple_min_invariant (ptr
))
6503 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6504 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6505 gcc_assert (!new_bb
);
6508 /* Currently we support only unconditional scatter stores,
6509 so mask should be all ones. */
6510 mask
= build_int_cst (masktype
, -1);
6511 mask
= vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
6513 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6515 prev_stmt_info
= NULL
;
6516 for (j
= 0; j
< ncopies
; ++j
)
6521 = vect_get_vec_def_for_operand (op
, stmt_info
);
6523 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt_info
);
6525 else if (modifier
!= NONE
&& (j
& 1))
6527 if (modifier
== WIDEN
)
6530 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
6531 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6534 else if (modifier
== NARROW
)
6536 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6539 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
6547 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
6549 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
6552 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6554 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6555 TYPE_VECTOR_SUBPARTS (srctype
)));
6556 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6557 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6559 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6560 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6564 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6566 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6567 TYPE_VECTOR_SUBPARTS (idxtype
)));
6568 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6569 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6571 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6572 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6577 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
6578 stmt_vec_info new_stmt_info
6579 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6581 if (prev_stmt_info
== NULL_STMT_VEC_INFO
)
6582 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6584 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6585 prev_stmt_info
= new_stmt_info
;
6590 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6591 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
6596 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
6598 /* We vectorize all the stmts of the interleaving group when we
6599 reach the last stmt in the group. */
6600 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
6601 < DR_GROUP_SIZE (first_stmt_info
)
6610 grouped_store
= false;
6611 /* VEC_NUM is the number of vect stmts to be created for this
6613 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6614 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6615 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
6616 == first_stmt_info
);
6617 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6618 op
= vect_get_store_rhs (first_stmt_info
);
6621 /* VEC_NUM is the number of vect stmts to be created for this
6623 vec_num
= group_size
;
6625 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
6628 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6630 if (dump_enabled_p ())
6631 dump_printf_loc (MSG_NOTE
, vect_location
,
6632 "transform store. ncopies = %d\n", ncopies
);
6634 if (memory_access_type
== VMAT_ELEMENTWISE
6635 || memory_access_type
== VMAT_STRIDED_SLP
)
6637 gimple_stmt_iterator incr_gsi
;
6643 tree stride_base
, stride_step
, alias_off
;
6646 /* Checked by get_load_store_type. */
6647 unsigned int const_nunits
= nunits
.to_constant ();
6649 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6650 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
6653 = fold_build_pointer_plus
6654 (DR_BASE_ADDRESS (first_dr
),
6655 size_binop (PLUS_EXPR
,
6656 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6657 convert_to_ptrofftype (DR_INIT (first_dr
))));
6658 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6660 /* For a store with loop-invariant (but other than power-of-2)
6661 stride (i.e. not a grouped access) like so:
6663 for (i = 0; i < n; i += stride)
6666 we generate a new induction variable and new stores from
6667 the components of the (vectorized) rhs:
6669 for (j = 0; ; j += VF*stride)
6674 array[j + stride] = tmp2;
6678 unsigned nstores
= const_nunits
;
6680 tree ltype
= elem_type
;
6681 tree lvectype
= vectype
;
6684 if (group_size
< const_nunits
6685 && const_nunits
% group_size
== 0)
6687 nstores
= const_nunits
/ group_size
;
6689 ltype
= build_vector_type (elem_type
, group_size
);
6692 /* First check if vec_extract optab doesn't support extraction
6693 of vector elts directly. */
6694 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6696 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6697 || !VECTOR_MODE_P (vmode
)
6698 || !targetm
.vector_mode_supported_p (vmode
)
6699 || (convert_optab_handler (vec_extract_optab
,
6700 TYPE_MODE (vectype
), vmode
)
6701 == CODE_FOR_nothing
))
6703 /* Try to avoid emitting an extract of vector elements
6704 by performing the extracts using an integer type of the
6705 same size, extracting from a vector of those and then
6706 re-interpreting it as the original vector type if
6709 = group_size
* GET_MODE_BITSIZE (elmode
);
6710 elmode
= int_mode_for_size (lsize
, 0).require ();
6711 unsigned int lnunits
= const_nunits
/ group_size
;
6712 /* If we can't construct such a vector fall back to
6713 element extracts from the original vector type and
6714 element size stores. */
6715 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6716 && VECTOR_MODE_P (vmode
)
6717 && targetm
.vector_mode_supported_p (vmode
)
6718 && (convert_optab_handler (vec_extract_optab
,
6720 != CODE_FOR_nothing
))
6724 ltype
= build_nonstandard_integer_type (lsize
, 1);
6725 lvectype
= build_vector_type (ltype
, nstores
);
6727 /* Else fall back to vector extraction anyway.
6728 Fewer stores are more important than avoiding spilling
6729 of the vector we extract from. Compared to the
6730 construction case in vectorizable_load no store-forwarding
6731 issue exists here for reasonable archs. */
6734 else if (group_size
>= const_nunits
6735 && group_size
% const_nunits
== 0)
6738 lnel
= const_nunits
;
6742 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6743 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6746 ivstep
= stride_step
;
6747 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6748 build_int_cst (TREE_TYPE (ivstep
), vf
));
6750 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6752 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
6753 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
6754 create_iv (stride_base
, ivstep
, NULL
,
6755 loop
, &incr_gsi
, insert_after
,
6757 incr
= gsi_stmt (incr_gsi
);
6758 loop_vinfo
->add_stmt (incr
);
6760 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
6762 prev_stmt_info
= NULL
;
6763 alias_off
= build_int_cst (ref_type
, 0);
6764 stmt_vec_info next_stmt_info
= first_stmt_info
;
6765 for (g
= 0; g
< group_size
; g
++)
6767 running_off
= offvar
;
6770 tree size
= TYPE_SIZE_UNIT (ltype
);
6771 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6773 tree newoff
= copy_ssa_name (running_off
, NULL
);
6774 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6776 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
6777 running_off
= newoff
;
6779 unsigned int group_el
= 0;
6780 unsigned HOST_WIDE_INT
6781 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6782 for (j
= 0; j
< ncopies
; j
++)
6784 /* We've set op and dt above, from vect_get_store_rhs,
6785 and first_stmt_info == stmt_info. */
6790 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
6791 &vec_oprnds
, NULL
, slp_node
);
6792 vec_oprnd
= vec_oprnds
[0];
6796 op
= vect_get_store_rhs (next_stmt_info
);
6797 vec_oprnd
= vect_get_vec_def_for_operand
6798 (op
, next_stmt_info
);
6804 vec_oprnd
= vec_oprnds
[j
];
6806 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
6809 /* Pun the vector to extract from if necessary. */
6810 if (lvectype
!= vectype
)
6812 tree tem
= make_ssa_name (lvectype
);
6814 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6815 lvectype
, vec_oprnd
));
6816 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
6819 for (i
= 0; i
< nstores
; i
++)
6821 tree newref
, newoff
;
6822 gimple
*incr
, *assign
;
6823 tree size
= TYPE_SIZE (ltype
);
6824 /* Extract the i'th component. */
6825 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6826 bitsize_int (i
), size
);
6827 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6830 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6834 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6836 newref
= build2 (MEM_REF
, ltype
,
6837 running_off
, this_off
);
6838 vect_copy_ref_info (newref
, DR_REF (first_dr
));
6840 /* And store it to *running_off. */
6841 assign
= gimple_build_assign (newref
, elem
);
6842 stmt_vec_info assign_info
6843 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
6847 || group_el
== group_size
)
6849 newoff
= copy_ssa_name (running_off
, NULL
);
6850 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6851 running_off
, stride_step
);
6852 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
6854 running_off
= newoff
;
6857 if (g
== group_size
- 1
6860 if (j
== 0 && i
== 0)
6861 STMT_VINFO_VEC_STMT (stmt_info
)
6862 = *vec_stmt
= assign_info
;
6864 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
6865 prev_stmt_info
= assign_info
;
6869 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6874 vec_oprnds
.release ();
6878 auto_vec
<tree
> dr_chain (group_size
);
6879 oprnds
.create (group_size
);
6881 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6882 gcc_assert (alignment_support_scheme
);
6883 vec_loop_masks
*loop_masks
6884 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6885 ? &LOOP_VINFO_MASKS (loop_vinfo
)
6887 /* Targets with store-lane instructions must not require explicit
6888 realignment. vect_supportable_dr_alignment always returns either
6889 dr_aligned or dr_unaligned_supported for masked operations. */
6890 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
6893 || alignment_support_scheme
== dr_aligned
6894 || alignment_support_scheme
== dr_unaligned_supported
);
6896 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6897 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6898 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6901 tree vec_offset
= NULL_TREE
;
6902 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6904 aggr_type
= NULL_TREE
;
6907 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
6909 aggr_type
= elem_type
;
6910 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
6911 &bump
, &vec_offset
);
6915 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6916 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6918 aggr_type
= vectype
;
6919 bump
= vect_get_data_ptr_increment (dr
, aggr_type
, memory_access_type
);
6923 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
6925 /* In case the vectorization factor (VF) is bigger than the number
6926 of elements that we can fit in a vectype (nunits), we have to generate
6927 more than one vector stmt - i.e - we need to "unroll" the
6928 vector stmt by a factor VF/nunits. For more details see documentation in
6929 vect_get_vec_def_for_copy_stmt. */
6931 /* In case of interleaving (non-unit grouped access):
6938 We create vectorized stores starting from base address (the access of the
6939 first stmt in the chain (S2 in the above example), when the last store stmt
6940 of the chain (S4) is reached:
6943 VS2: &base + vec_size*1 = vx0
6944 VS3: &base + vec_size*2 = vx1
6945 VS4: &base + vec_size*3 = vx3
6947 Then permutation statements are generated:
6949 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6950 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6953 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6954 (the order of the data-refs in the output of vect_permute_store_chain
6955 corresponds to the order of scalar stmts in the interleaving chain - see
6956 the documentation of vect_permute_store_chain()).
6958 In case of both multiple types and interleaving, above vector stores and
6959 permutation stmts are created for every copy. The result vector stmts are
6960 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6961 STMT_VINFO_RELATED_STMT for the next copies.
6964 prev_stmt_info
= NULL
;
6965 tree vec_mask
= NULL_TREE
;
6966 for (j
= 0; j
< ncopies
; j
++)
6968 stmt_vec_info new_stmt_info
;
6973 /* Get vectorized arguments for SLP_NODE. */
6974 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
6977 vec_oprnd
= vec_oprnds
[0];
6981 /* For interleaved stores we collect vectorized defs for all the
6982 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6983 used as an input to vect_permute_store_chain(), and OPRNDS as
6984 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6986 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6987 OPRNDS are of size 1. */
6988 stmt_vec_info next_stmt_info
= first_stmt_info
;
6989 for (i
= 0; i
< group_size
; i
++)
6991 /* Since gaps are not supported for interleaved stores,
6992 DR_GROUP_SIZE is the exact number of stmts in the chain.
6993 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6994 that there is no interleaving, DR_GROUP_SIZE is 1,
6995 and only one iteration of the loop will be executed. */
6996 op
= vect_get_store_rhs (next_stmt_info
);
6997 vec_oprnd
= vect_get_vec_def_for_operand
6998 (op
, next_stmt_info
);
6999 dr_chain
.quick_push (vec_oprnd
);
7000 oprnds
.quick_push (vec_oprnd
);
7001 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7004 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
7008 /* We should have catched mismatched types earlier. */
7009 gcc_assert (useless_type_conversion_p (vectype
,
7010 TREE_TYPE (vec_oprnd
)));
7011 bool simd_lane_access_p
7012 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7013 if (simd_lane_access_p
7014 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7015 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7016 && integer_zerop (DR_OFFSET (first_dr
))
7017 && integer_zerop (DR_INIT (first_dr
))
7018 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7019 get_alias_set (TREE_TYPE (ref_type
))))
7021 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7022 dataref_offset
= build_int_cst (ref_type
, 0);
7025 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7027 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
7028 &dataref_ptr
, &vec_offset
);
7033 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
7034 simd_lane_access_p
? loop
: NULL
,
7035 offset
, &dummy
, gsi
, &ptr_incr
,
7036 simd_lane_access_p
, &inv_p
,
7038 gcc_assert (bb_vinfo
|| !inv_p
);
7042 /* For interleaved stores we created vectorized defs for all the
7043 defs stored in OPRNDS in the previous iteration (previous copy).
7044 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7045 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7047 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7048 OPRNDS are of size 1. */
7049 for (i
= 0; i
< group_size
; i
++)
7052 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
7053 dr_chain
[i
] = vec_oprnd
;
7054 oprnds
[i
] = vec_oprnd
;
7057 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
7060 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7061 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7062 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
7064 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7068 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7072 /* Get an array into which we can store the individual vectors. */
7073 vec_array
= create_vector_array (vectype
, vec_num
);
7075 /* Invalidate the current contents of VEC_ARRAY. This should
7076 become an RTL clobber too, which prevents the vector registers
7077 from being upward-exposed. */
7078 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
7080 /* Store the individual vectors into the array. */
7081 for (i
= 0; i
< vec_num
; i
++)
7083 vec_oprnd
= dr_chain
[i
];
7084 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
7087 tree final_mask
= NULL
;
7089 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7092 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7099 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7101 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7102 tree alias_ptr
= build_int_cst (ref_type
, align
);
7103 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7104 dataref_ptr
, alias_ptr
,
7105 final_mask
, vec_array
);
7110 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7111 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7112 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7114 gimple_call_set_lhs (call
, data_ref
);
7116 gimple_call_set_nothrow (call
, true);
7117 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7119 /* Record that VEC_ARRAY is now dead. */
7120 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
7124 new_stmt_info
= NULL
;
7128 result_chain
.create (group_size
);
7130 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
7134 stmt_vec_info next_stmt_info
= first_stmt_info
;
7135 for (i
= 0; i
< vec_num
; i
++)
7137 unsigned align
, misalign
;
7139 tree final_mask
= NULL_TREE
;
7141 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
7143 vectype
, vec_num
* j
+ i
);
7145 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7148 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7150 tree scale
= size_int (gs_info
.scale
);
7153 call
= gimple_build_call_internal
7154 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
7155 scale
, vec_oprnd
, final_mask
);
7157 call
= gimple_build_call_internal
7158 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
7160 gimple_call_set_nothrow (call
, true);
7162 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7167 /* Bump the vector pointer. */
7168 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7172 vec_oprnd
= vec_oprnds
[i
];
7173 else if (grouped_store
)
7174 /* For grouped stores vectorized defs are interleaved in
7175 vect_permute_store_chain(). */
7176 vec_oprnd
= result_chain
[i
];
7178 align
= DR_TARGET_ALIGNMENT (first_dr
);
7179 if (aligned_access_p (first_dr
))
7181 else if (DR_MISALIGNMENT (first_dr
) == -1)
7183 align
= dr_alignment (vect_dr_behavior (first_dr
));
7187 misalign
= DR_MISALIGNMENT (first_dr
);
7188 if (dataref_offset
== NULL_TREE
7189 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7190 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
7193 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7195 tree perm_mask
= perm_mask_for_reverse (vectype
);
7196 tree perm_dest
= vect_create_destination_var
7197 (vect_get_store_rhs (stmt_info
), vectype
);
7198 tree new_temp
= make_ssa_name (perm_dest
);
7200 /* Generate the permute statement. */
7202 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
7203 vec_oprnd
, perm_mask
);
7204 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
7206 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7207 vec_oprnd
= new_temp
;
7210 /* Arguments are ready. Create the new vector stmt. */
7213 align
= least_bit_hwi (misalign
| align
);
7214 tree ptr
= build_int_cst (ref_type
, align
);
7216 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
7218 final_mask
, vec_oprnd
);
7219 gimple_call_set_nothrow (call
, true);
7221 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7225 data_ref
= fold_build2 (MEM_REF
, vectype
,
7229 : build_int_cst (ref_type
, 0));
7230 if (aligned_access_p (first_dr
))
7232 else if (DR_MISALIGNMENT (first_dr
) == -1)
7233 TREE_TYPE (data_ref
)
7234 = build_aligned_type (TREE_TYPE (data_ref
),
7235 align
* BITS_PER_UNIT
);
7237 TREE_TYPE (data_ref
)
7238 = build_aligned_type (TREE_TYPE (data_ref
),
7239 TYPE_ALIGN (elem_type
));
7240 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
7242 = gimple_build_assign (data_ref
, vec_oprnd
);
7244 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7250 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7251 if (!next_stmt_info
)
7258 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7260 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7261 prev_stmt_info
= new_stmt_info
;
7266 result_chain
.release ();
7267 vec_oprnds
.release ();
7272 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7273 VECTOR_CST mask. No checks are made that the target platform supports the
7274 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7275 vect_gen_perm_mask_checked. */
7278 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
7282 poly_uint64 nunits
= sel
.length ();
7283 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
7285 mask_type
= build_vector_type (ssizetype
, nunits
);
7286 return vec_perm_indices_to_tree (mask_type
, sel
);
7289 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7290 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7293 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
7295 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
7296 return vect_gen_perm_mask_any (vectype
, sel
);
7299 /* Given a vector variable X and Y, that was generated for the scalar
7300 STMT_INFO, generate instructions to permute the vector elements of X and Y
7301 using permutation mask MASK_VEC, insert them at *GSI and return the
7302 permuted vector variable. */
7305 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
7306 gimple_stmt_iterator
*gsi
)
7308 tree vectype
= TREE_TYPE (x
);
7309 tree perm_dest
, data_ref
;
7312 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
7313 if (TREE_CODE (scalar_dest
) == SSA_NAME
)
7314 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7316 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
7317 data_ref
= make_ssa_name (perm_dest
);
7319 /* Generate the permute statement. */
7320 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
7321 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
7326 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7327 inserting them on the loops preheader edge. Returns true if we
7328 were successful in doing so (and thus STMT_INFO can be moved then),
7329 otherwise returns false. */
7332 hoist_defs_of_uses (stmt_vec_info stmt_info
, struct loop
*loop
)
7338 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
7340 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7341 if (!gimple_nop_p (def_stmt
)
7342 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7344 /* Make sure we don't need to recurse. While we could do
7345 so in simple cases when there are more complex use webs
7346 we don't have an easy way to preserve stmt order to fulfil
7347 dependencies within them. */
7350 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
7352 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
7354 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
7355 if (!gimple_nop_p (def_stmt2
)
7356 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
7366 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
7368 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7369 if (!gimple_nop_p (def_stmt
)
7370 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7372 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7373 gsi_remove (&gsi
, false);
7374 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
7381 /* vectorizable_load.
7383 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7384 that can be vectorized.
7385 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7386 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7387 Return true if STMT_INFO is vectorizable in this way. */
7390 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7391 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7392 slp_instance slp_node_instance
,
7393 stmt_vector_for_cost
*cost_vec
)
7396 tree vec_dest
= NULL
;
7397 tree data_ref
= NULL
;
7398 stmt_vec_info prev_stmt_info
;
7399 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7400 struct loop
*loop
= NULL
;
7401 struct loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
7402 bool nested_in_vect_loop
= false;
7403 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
7408 enum dr_alignment_support alignment_support_scheme
;
7409 tree dataref_ptr
= NULL_TREE
;
7410 tree dataref_offset
= NULL_TREE
;
7411 gimple
*ptr_incr
= NULL
;
7414 unsigned int group_size
;
7415 poly_uint64 group_gap_adj
;
7416 tree msq
= NULL_TREE
, lsq
;
7417 tree offset
= NULL_TREE
;
7418 tree byte_offset
= NULL_TREE
;
7419 tree realignment_token
= NULL_TREE
;
7421 vec
<tree
> dr_chain
= vNULL
;
7422 bool grouped_load
= false;
7423 stmt_vec_info first_stmt_info
;
7424 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
7426 bool compute_in_loop
= false;
7427 struct loop
*at_loop
;
7429 bool slp
= (slp_node
!= NULL
);
7430 bool slp_perm
= false;
7431 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7434 gather_scatter_info gs_info
;
7435 vec_info
*vinfo
= stmt_info
->vinfo
;
7437 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7439 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7442 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7446 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7447 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7449 scalar_dest
= gimple_assign_lhs (assign
);
7450 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
7453 tree_code code
= gimple_assign_rhs_code (assign
);
7454 if (code
!= ARRAY_REF
7455 && code
!= BIT_FIELD_REF
7456 && code
!= INDIRECT_REF
7457 && code
!= COMPONENT_REF
7458 && code
!= IMAGPART_EXPR
7459 && code
!= REALPART_EXPR
7461 && TREE_CODE_CLASS (code
) != tcc_declaration
)
7466 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7467 if (!call
|| !gimple_call_internal_p (call
))
7470 internal_fn ifn
= gimple_call_internal_fn (call
);
7471 if (!internal_load_fn_p (ifn
))
7474 scalar_dest
= gimple_call_lhs (call
);
7478 if (slp_node
!= NULL
)
7480 if (dump_enabled_p ())
7481 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7482 "SLP of masked loads not supported.\n");
7486 int mask_index
= internal_fn_mask_index (ifn
);
7487 if (mask_index
>= 0)
7489 mask
= gimple_call_arg (call
, mask_index
);
7490 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
7496 if (!STMT_VINFO_DATA_REF (stmt_info
))
7499 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7500 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7504 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7505 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
7506 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7511 /* Multiple types in SLP are handled by creating the appropriate number of
7512 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7517 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7519 gcc_assert (ncopies
>= 1);
7521 /* FORNOW. This restriction should be relaxed. */
7522 if (nested_in_vect_loop
&& ncopies
> 1)
7524 if (dump_enabled_p ())
7525 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7526 "multiple types in nested loop.\n");
7530 /* Invalidate assumptions made by dependence analysis when vectorization
7531 on the unrolled body effectively re-orders stmts. */
7533 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7534 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7535 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7537 if (dump_enabled_p ())
7538 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7539 "cannot perform implicit CSE when unrolling "
7540 "with negative dependence distance\n");
7544 elem_type
= TREE_TYPE (vectype
);
7545 mode
= TYPE_MODE (vectype
);
7547 /* FORNOW. In some cases can vectorize even if data-type not supported
7548 (e.g. - data copies). */
7549 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7551 if (dump_enabled_p ())
7552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7553 "Aligned load, but unsupported type.\n");
7557 /* Check if the load is a part of an interleaving chain. */
7558 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7560 grouped_load
= true;
7562 gcc_assert (!nested_in_vect_loop
);
7563 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7565 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7566 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7568 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7571 /* Invalidate assumptions made by dependence analysis when vectorization
7572 on the unrolled body effectively re-orders stmts. */
7573 if (!PURE_SLP_STMT (stmt_info
)
7574 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7575 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7576 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7578 if (dump_enabled_p ())
7579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7580 "cannot perform implicit CSE when performing "
7581 "group loads with negative dependence distance\n");
7585 /* Similarly when the stmt is a load that is both part of a SLP
7586 instance and a loop vectorized stmt via the same-dr mechanism
7587 we have to give up. */
7588 if (DR_GROUP_SAME_DR_STMT (stmt_info
)
7589 && (STMT_SLP_TYPE (stmt_info
)
7590 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info
))))
7592 if (dump_enabled_p ())
7593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7594 "conflicting SLP types for CSEd load\n");
7601 vect_memory_access_type memory_access_type
;
7602 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7603 &memory_access_type
, &gs_info
))
7608 if (memory_access_type
== VMAT_CONTIGUOUS
)
7610 machine_mode vec_mode
= TYPE_MODE (vectype
);
7611 if (!VECTOR_MODE_P (vec_mode
)
7612 || !can_vec_mask_load_store_p (vec_mode
,
7613 TYPE_MODE (mask_vectype
), true))
7616 else if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7618 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7620 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
7621 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
7623 if (dump_enabled_p ())
7624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7625 "masked gather with integer mask not"
7630 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7631 && memory_access_type
!= VMAT_GATHER_SCATTER
)
7633 if (dump_enabled_p ())
7634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7635 "unsupported access type for masked load.\n");
7640 if (!vec_stmt
) /* transformation not required. */
7643 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7646 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7647 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7648 memory_access_type
, &gs_info
);
7650 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7651 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7652 slp_node_instance
, slp_node
, cost_vec
);
7657 gcc_assert (memory_access_type
7658 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7660 if (dump_enabled_p ())
7661 dump_printf_loc (MSG_NOTE
, vect_location
,
7662 "transform load. ncopies = %d\n", ncopies
);
7666 ensure_base_align (dr
);
7668 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7670 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
7674 if (memory_access_type
== VMAT_ELEMENTWISE
7675 || memory_access_type
== VMAT_STRIDED_SLP
)
7677 gimple_stmt_iterator incr_gsi
;
7683 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7684 tree stride_base
, stride_step
, alias_off
;
7685 /* Checked by get_load_store_type. */
7686 unsigned int const_nunits
= nunits
.to_constant ();
7687 unsigned HOST_WIDE_INT cst_offset
= 0;
7689 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7690 gcc_assert (!nested_in_vect_loop
);
7694 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7695 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
7699 first_stmt_info
= stmt_info
;
7702 if (slp
&& grouped_load
)
7704 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7705 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7711 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
7712 * vect_get_place_in_interleaving_chain (stmt_info
,
7715 ref_type
= reference_alias_ptr_type (DR_REF (dr
));
7719 = fold_build_pointer_plus
7720 (DR_BASE_ADDRESS (first_dr
),
7721 size_binop (PLUS_EXPR
,
7722 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7723 convert_to_ptrofftype (DR_INIT (first_dr
))));
7724 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7726 /* For a load with loop-invariant (but other than power-of-2)
7727 stride (i.e. not a grouped access) like so:
7729 for (i = 0; i < n; i += stride)
7732 we generate a new induction variable and new accesses to
7733 form a new vector (or vectors, depending on ncopies):
7735 for (j = 0; ; j += VF*stride)
7737 tmp2 = array[j + stride];
7739 vectemp = {tmp1, tmp2, ...}
7742 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7743 build_int_cst (TREE_TYPE (stride_step
), vf
));
7745 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7747 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7748 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7749 create_iv (stride_base
, ivstep
, NULL
,
7750 loop
, &incr_gsi
, insert_after
,
7752 incr
= gsi_stmt (incr_gsi
);
7753 loop_vinfo
->add_stmt (incr
);
7755 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7757 prev_stmt_info
= NULL
;
7758 running_off
= offvar
;
7759 alias_off
= build_int_cst (ref_type
, 0);
7760 int nloads
= const_nunits
;
7762 tree ltype
= TREE_TYPE (vectype
);
7763 tree lvectype
= vectype
;
7764 auto_vec
<tree
> dr_chain
;
7765 if (memory_access_type
== VMAT_STRIDED_SLP
)
7767 if (group_size
< const_nunits
)
7769 /* First check if vec_init optab supports construction from
7770 vector elts directly. */
7771 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7773 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7774 && VECTOR_MODE_P (vmode
)
7775 && targetm
.vector_mode_supported_p (vmode
)
7776 && (convert_optab_handler (vec_init_optab
,
7777 TYPE_MODE (vectype
), vmode
)
7778 != CODE_FOR_nothing
))
7780 nloads
= const_nunits
/ group_size
;
7782 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7786 /* Otherwise avoid emitting a constructor of vector elements
7787 by performing the loads using an integer type of the same
7788 size, constructing a vector of those and then
7789 re-interpreting it as the original vector type.
7790 This avoids a huge runtime penalty due to the general
7791 inability to perform store forwarding from smaller stores
7792 to a larger load. */
7794 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7795 elmode
= int_mode_for_size (lsize
, 0).require ();
7796 unsigned int lnunits
= const_nunits
/ group_size
;
7797 /* If we can't construct such a vector fall back to
7798 element loads of the original vector type. */
7799 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7800 && VECTOR_MODE_P (vmode
)
7801 && targetm
.vector_mode_supported_p (vmode
)
7802 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7803 != CODE_FOR_nothing
))
7807 ltype
= build_nonstandard_integer_type (lsize
, 1);
7808 lvectype
= build_vector_type (ltype
, nloads
);
7815 lnel
= const_nunits
;
7818 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7820 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7821 else if (nloads
== 1)
7826 /* For SLP permutation support we need to load the whole group,
7827 not only the number of vector stmts the permutation result
7831 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7833 unsigned int const_vf
= vf
.to_constant ();
7834 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7835 dr_chain
.create (ncopies
);
7838 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7840 unsigned int group_el
= 0;
7841 unsigned HOST_WIDE_INT
7842 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7843 for (j
= 0; j
< ncopies
; j
++)
7846 vec_alloc (v
, nloads
);
7847 stmt_vec_info new_stmt_info
= NULL
;
7848 for (i
= 0; i
< nloads
; i
++)
7850 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7851 group_el
* elsz
+ cst_offset
);
7852 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
7853 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
7855 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
7857 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7859 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7860 gimple_assign_lhs (new_stmt
));
7864 || group_el
== group_size
)
7866 tree newoff
= copy_ssa_name (running_off
);
7867 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7868 running_off
, stride_step
);
7869 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7871 running_off
= newoff
;
7877 tree vec_inv
= build_constructor (lvectype
, v
);
7878 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
7879 new_stmt_info
= vinfo
->lookup_def (new_temp
);
7880 if (lvectype
!= vectype
)
7883 = gimple_build_assign (make_ssa_name (vectype
),
7885 build1 (VIEW_CONVERT_EXPR
,
7886 vectype
, new_temp
));
7888 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7895 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
7897 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
7902 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7904 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7905 prev_stmt_info
= new_stmt_info
;
7911 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7912 slp_node_instance
, false, &n_perms
);
7917 if (memory_access_type
== VMAT_GATHER_SCATTER
7918 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
7919 grouped_load
= false;
7923 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7924 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7925 /* For SLP vectorization we directly vectorize a subchain
7926 without permutation. */
7927 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7928 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7929 /* For BB vectorization always use the first stmt to base
7930 the data ref pointer on. */
7932 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7934 /* Check if the chain of loads is already vectorized. */
7935 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
7936 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7937 ??? But we can only do so if there is exactly one
7938 as we have no way to get at the rest. Leave the CSE
7940 ??? With the group load eventually participating
7941 in multiple different permutations (having multiple
7942 slp nodes which refer to the same group) the CSE
7943 is even wrong code. See PR56270. */
7946 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7949 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
7952 /* VEC_NUM is the number of vect stmts to be created for this group. */
7955 grouped_load
= false;
7956 /* For SLP permutation support we need to load the whole group,
7957 not only the number of vector stmts the permutation result
7961 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7963 unsigned int const_vf
= vf
.to_constant ();
7964 unsigned int const_nunits
= nunits
.to_constant ();
7965 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
7966 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7970 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7972 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7976 vec_num
= group_size
;
7978 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7982 first_stmt_info
= stmt_info
;
7984 group_size
= vec_num
= 1;
7986 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7989 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7990 gcc_assert (alignment_support_scheme
);
7991 vec_loop_masks
*loop_masks
7992 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7993 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7995 /* Targets with store-lane instructions must not require explicit
7996 realignment. vect_supportable_dr_alignment always returns either
7997 dr_aligned or dr_unaligned_supported for masked operations. */
7998 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8001 || alignment_support_scheme
== dr_aligned
8002 || alignment_support_scheme
== dr_unaligned_supported
);
8004 /* In case the vectorization factor (VF) is bigger than the number
8005 of elements that we can fit in a vectype (nunits), we have to generate
8006 more than one vector stmt - i.e - we need to "unroll" the
8007 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8008 from one copy of the vector stmt to the next, in the field
8009 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8010 stages to find the correct vector defs to be used when vectorizing
8011 stmts that use the defs of the current stmt. The example below
8012 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8013 need to create 4 vectorized stmts):
8015 before vectorization:
8016 RELATED_STMT VEC_STMT
8020 step 1: vectorize stmt S1:
8021 We first create the vector stmt VS1_0, and, as usual, record a
8022 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8023 Next, we create the vector stmt VS1_1, and record a pointer to
8024 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8025 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8027 RELATED_STMT VEC_STMT
8028 VS1_0: vx0 = memref0 VS1_1 -
8029 VS1_1: vx1 = memref1 VS1_2 -
8030 VS1_2: vx2 = memref2 VS1_3 -
8031 VS1_3: vx3 = memref3 - -
8032 S1: x = load - VS1_0
8035 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8036 information we recorded in RELATED_STMT field is used to vectorize
8039 /* In case of interleaving (non-unit grouped access):
8046 Vectorized loads are created in the order of memory accesses
8047 starting from the access of the first stmt of the chain:
8050 VS2: vx1 = &base + vec_size*1
8051 VS3: vx3 = &base + vec_size*2
8052 VS4: vx4 = &base + vec_size*3
8054 Then permutation statements are generated:
8056 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8057 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8060 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8061 (the order of the data-refs in the output of vect_permute_load_chain
8062 corresponds to the order of scalar stmts in the interleaving chain - see
8063 the documentation of vect_permute_load_chain()).
8064 The generation of permutation stmts and recording them in
8065 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8067 In case of both multiple types and interleaving, the vector loads and
8068 permutation stmts above are created for every copy. The result vector
8069 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8070 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8072 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8073 on a target that supports unaligned accesses (dr_unaligned_supported)
8074 we generate the following code:
8078 p = p + indx * vectype_size;
8083 Otherwise, the data reference is potentially unaligned on a target that
8084 does not support unaligned accesses (dr_explicit_realign_optimized) -
8085 then generate the following code, in which the data in each iteration is
8086 obtained by two vector loads, one from the previous iteration, and one
8087 from the current iteration:
8089 msq_init = *(floor(p1))
8090 p2 = initial_addr + VS - 1;
8091 realignment_token = call target_builtin;
8094 p2 = p2 + indx * vectype_size
8096 vec_dest = realign_load (msq, lsq, realignment_token)
8101 /* If the misalignment remains the same throughout the execution of the
8102 loop, we can create the init_addr and permutation mask at the loop
8103 preheader. Otherwise, it needs to be created inside the loop.
8104 This can only occur when vectorizing memory accesses in the inner-loop
8105 nested within an outer-loop that is being vectorized. */
8107 if (nested_in_vect_loop
8108 && !multiple_p (DR_STEP_ALIGNMENT (dr
),
8109 GET_MODE_SIZE (TYPE_MODE (vectype
))))
8111 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
8112 compute_in_loop
= true;
8115 if ((alignment_support_scheme
== dr_explicit_realign_optimized
8116 || alignment_support_scheme
== dr_explicit_realign
)
8117 && !compute_in_loop
)
8119 msq
= vect_setup_realignment (first_stmt_info
, gsi
, &realignment_token
,
8120 alignment_support_scheme
, NULL_TREE
,
8122 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8124 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
8125 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
8132 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8133 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8136 tree vec_offset
= NULL_TREE
;
8137 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8139 aggr_type
= NULL_TREE
;
8142 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8144 aggr_type
= elem_type
;
8145 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8146 &bump
, &vec_offset
);
8150 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8151 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8153 aggr_type
= vectype
;
8154 bump
= vect_get_data_ptr_increment (dr
, aggr_type
, memory_access_type
);
8157 tree vec_mask
= NULL_TREE
;
8158 prev_stmt_info
= NULL
;
8159 poly_uint64 group_elt
= 0;
8160 for (j
= 0; j
< ncopies
; j
++)
8162 stmt_vec_info new_stmt_info
= NULL
;
8163 /* 1. Create the vector or array pointer update chain. */
8166 bool simd_lane_access_p
8167 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
8168 if (simd_lane_access_p
8169 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
8170 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
8171 && integer_zerop (DR_OFFSET (first_dr
))
8172 && integer_zerop (DR_INIT (first_dr
))
8173 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8174 get_alias_set (TREE_TYPE (ref_type
)))
8175 && (alignment_support_scheme
== dr_aligned
8176 || alignment_support_scheme
== dr_unaligned_supported
))
8178 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
8179 dataref_offset
= build_int_cst (ref_type
, 0);
8182 else if (first_stmt_info_for_drptr
8183 && first_stmt_info
!= first_stmt_info_for_drptr
)
8186 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
8187 aggr_type
, at_loop
, offset
, &dummy
,
8188 gsi
, &ptr_incr
, simd_lane_access_p
,
8189 &inv_p
, byte_offset
, bump
);
8190 /* Adjust the pointer by the difference to first_stmt. */
8191 data_reference_p ptrdr
8192 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
8193 tree diff
= fold_convert (sizetype
,
8194 size_binop (MINUS_EXPR
,
8197 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8200 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8202 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
8203 &dataref_ptr
, &vec_offset
);
8208 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
8209 offset
, &dummy
, gsi
, &ptr_incr
,
8210 simd_lane_access_p
, &inv_p
,
8213 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
8219 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
8221 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8222 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8224 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8227 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8230 if (grouped_load
|| slp_perm
)
8231 dr_chain
.create (vec_num
);
8233 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8237 vec_array
= create_vector_array (vectype
, vec_num
);
8239 tree final_mask
= NULL_TREE
;
8241 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8244 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8251 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8253 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8254 tree alias_ptr
= build_int_cst (ref_type
, align
);
8255 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
8256 dataref_ptr
, alias_ptr
,
8262 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8263 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8264 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
8266 gimple_call_set_lhs (call
, vec_array
);
8267 gimple_call_set_nothrow (call
, true);
8268 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8270 /* Extract each vector into an SSA_NAME. */
8271 for (i
= 0; i
< vec_num
; i
++)
8273 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
8275 dr_chain
.quick_push (new_temp
);
8278 /* Record the mapping between SSA_NAMEs and statements. */
8279 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
8281 /* Record that VEC_ARRAY is now dead. */
8282 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8286 for (i
= 0; i
< vec_num
; i
++)
8288 tree final_mask
= NULL_TREE
;
8290 && memory_access_type
!= VMAT_INVARIANT
)
8291 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8293 vectype
, vec_num
* j
+ i
);
8295 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8299 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8302 /* 2. Create the vector-load in the loop. */
8303 gimple
*new_stmt
= NULL
;
8304 switch (alignment_support_scheme
)
8307 case dr_unaligned_supported
:
8309 unsigned int align
, misalign
;
8311 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8313 tree scale
= size_int (gs_info
.scale
);
8316 call
= gimple_build_call_internal
8317 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
8318 vec_offset
, scale
, final_mask
);
8320 call
= gimple_build_call_internal
8321 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
8323 gimple_call_set_nothrow (call
, true);
8325 data_ref
= NULL_TREE
;
8329 align
= DR_TARGET_ALIGNMENT (dr
);
8330 if (alignment_support_scheme
== dr_aligned
)
8332 gcc_assert (aligned_access_p (first_dr
));
8335 else if (DR_MISALIGNMENT (first_dr
) == -1)
8337 align
= dr_alignment (vect_dr_behavior (first_dr
));
8341 misalign
= DR_MISALIGNMENT (first_dr
);
8342 if (dataref_offset
== NULL_TREE
8343 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8344 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
8349 align
= least_bit_hwi (misalign
| align
);
8350 tree ptr
= build_int_cst (ref_type
, align
);
8352 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
8355 gimple_call_set_nothrow (call
, true);
8357 data_ref
= NULL_TREE
;
8362 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
8365 : build_int_cst (ref_type
, 0));
8366 if (alignment_support_scheme
== dr_aligned
)
8368 else if (DR_MISALIGNMENT (first_dr
) == -1)
8369 TREE_TYPE (data_ref
)
8370 = build_aligned_type (TREE_TYPE (data_ref
),
8371 align
* BITS_PER_UNIT
);
8373 TREE_TYPE (data_ref
)
8374 = build_aligned_type (TREE_TYPE (data_ref
),
8375 TYPE_ALIGN (elem_type
));
8379 case dr_explicit_realign
:
8383 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8385 if (compute_in_loop
)
8386 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
8388 dr_explicit_realign
,
8391 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8392 ptr
= copy_ssa_name (dataref_ptr
);
8394 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8395 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
8396 new_stmt
= gimple_build_assign
8397 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
8399 (TREE_TYPE (dataref_ptr
),
8400 -(HOST_WIDE_INT
) align
));
8401 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8403 = build2 (MEM_REF
, vectype
, ptr
,
8404 build_int_cst (ref_type
, 0));
8405 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
8406 vec_dest
= vect_create_destination_var (scalar_dest
,
8408 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8409 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8410 gimple_assign_set_lhs (new_stmt
, new_temp
);
8411 gimple_set_vdef (new_stmt
, gimple_vdef (stmt_info
->stmt
));
8412 gimple_set_vuse (new_stmt
, gimple_vuse (stmt_info
->stmt
));
8413 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8416 bump
= size_binop (MULT_EXPR
, vs
,
8417 TYPE_SIZE_UNIT (elem_type
));
8418 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
8419 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
8421 new_stmt
= gimple_build_assign
8422 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
8424 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
8425 ptr
= copy_ssa_name (ptr
, new_stmt
);
8426 gimple_assign_set_lhs (new_stmt
, ptr
);
8427 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8429 = build2 (MEM_REF
, vectype
, ptr
,
8430 build_int_cst (ref_type
, 0));
8433 case dr_explicit_realign_optimized
:
8435 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8436 new_temp
= copy_ssa_name (dataref_ptr
);
8438 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8439 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
8440 new_stmt
= gimple_build_assign
8441 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
8442 build_int_cst (TREE_TYPE (dataref_ptr
),
8443 -(HOST_WIDE_INT
) align
));
8444 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8446 = build2 (MEM_REF
, vectype
, new_temp
,
8447 build_int_cst (ref_type
, 0));
8453 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8454 /* DATA_REF is null if we've already built the statement. */
8457 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
8458 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8460 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8461 gimple_set_lhs (new_stmt
, new_temp
);
8463 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8465 /* 3. Handle explicit realignment if necessary/supported.
8467 vec_dest = realign_load (msq, lsq, realignment_token) */
8468 if (alignment_support_scheme
== dr_explicit_realign_optimized
8469 || alignment_support_scheme
== dr_explicit_realign
)
8471 lsq
= gimple_assign_lhs (new_stmt
);
8472 if (!realignment_token
)
8473 realignment_token
= dataref_ptr
;
8474 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8475 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
8476 msq
, lsq
, realignment_token
);
8477 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8478 gimple_assign_set_lhs (new_stmt
, new_temp
);
8480 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8482 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8485 if (i
== vec_num
- 1 && j
== ncopies
- 1)
8486 add_phi_arg (phi
, lsq
,
8487 loop_latch_edge (containing_loop
),
8493 /* 4. Handle invariant-load. */
8494 if (inv_p
&& !bb_vinfo
)
8496 gcc_assert (!grouped_load
);
8497 /* If we have versioned for aliasing or the loop doesn't
8498 have any data dependencies that would preclude this,
8499 then we are sure this is a loop invariant load and
8500 thus we can insert it on the preheader edge. */
8501 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8502 && !nested_in_vect_loop
8503 && hoist_defs_of_uses (stmt_info
, loop
))
8505 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8506 if (dump_enabled_p ())
8508 dump_printf_loc (MSG_NOTE
, vect_location
,
8509 "hoisting out of the vectorized "
8511 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8513 tree tem
= copy_ssa_name (scalar_dest
);
8514 gsi_insert_on_edge_immediate
8515 (loop_preheader_edge (loop
),
8516 gimple_build_assign (tem
,
8518 (gimple_assign_rhs1 (stmt
))));
8519 new_temp
= vect_init_vector (stmt_info
, tem
,
8521 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8522 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8526 gimple_stmt_iterator gsi2
= *gsi
;
8528 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8530 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8534 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8536 tree perm_mask
= perm_mask_for_reverse (vectype
);
8537 new_temp
= permute_vec_elements (new_temp
, new_temp
,
8538 perm_mask
, stmt_info
, gsi
);
8539 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8542 /* Collect vector loads and later create their permutation in
8543 vect_transform_grouped_load (). */
8544 if (grouped_load
|| slp_perm
)
8545 dr_chain
.quick_push (new_temp
);
8547 /* Store vector loads in the corresponding SLP_NODE. */
8548 if (slp
&& !slp_perm
)
8549 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8551 /* With SLP permutation we load the gaps as well, without
8552 we need to skip the gaps after we manage to fully load
8553 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8554 group_elt
+= nunits
;
8555 if (maybe_ne (group_gap_adj
, 0U)
8557 && known_eq (group_elt
, group_size
- group_gap_adj
))
8559 poly_wide_int bump_val
8560 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8562 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8563 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8568 /* Bump the vector pointer to account for a gap or for excess
8569 elements loaded for a permuted SLP load. */
8570 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
8572 poly_wide_int bump_val
8573 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8575 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8576 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8581 if (slp
&& !slp_perm
)
8587 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8588 slp_node_instance
, false,
8591 dr_chain
.release ();
8599 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
8600 vect_transform_grouped_load (stmt_info
, dr_chain
,
8602 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8607 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8609 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8610 prev_stmt_info
= new_stmt_info
;
8613 dr_chain
.release ();
8619 /* Function vect_is_simple_cond.
8622 LOOP - the loop that is being vectorized.
8623 COND - Condition that is checked for simple use.
8626 *COMP_VECTYPE - the vector type for the comparison.
8627 *DTS - The def types for the arguments of the comparison
8629 Returns whether a COND can be vectorized. Checks whether
8630 condition operands are supportable using vec_is_simple_use. */
8633 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8634 tree
*comp_vectype
, enum vect_def_type
*dts
,
8638 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8641 if (TREE_CODE (cond
) == SSA_NAME
8642 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8644 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
8646 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8651 if (!COMPARISON_CLASS_P (cond
))
8654 lhs
= TREE_OPERAND (cond
, 0);
8655 rhs
= TREE_OPERAND (cond
, 1);
8657 if (TREE_CODE (lhs
) == SSA_NAME
)
8659 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
8662 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8663 || TREE_CODE (lhs
) == FIXED_CST
)
8664 dts
[0] = vect_constant_def
;
8668 if (TREE_CODE (rhs
) == SSA_NAME
)
8670 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
8673 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8674 || TREE_CODE (rhs
) == FIXED_CST
)
8675 dts
[1] = vect_constant_def
;
8679 if (vectype1
&& vectype2
8680 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8681 TYPE_VECTOR_SUBPARTS (vectype2
)))
8684 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8685 /* Invariant comparison. */
8686 if (! *comp_vectype
&& vectype
)
8688 tree scalar_type
= TREE_TYPE (lhs
);
8689 /* If we can widen the comparison to match vectype do so. */
8690 if (INTEGRAL_TYPE_P (scalar_type
)
8691 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8692 TYPE_SIZE (TREE_TYPE (vectype
))))
8693 scalar_type
= build_nonstandard_integer_type
8694 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8695 TYPE_UNSIGNED (scalar_type
));
8696 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8702 /* vectorizable_condition.
8704 Check if STMT_INFO is conditional modify expression that can be vectorized.
8705 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8706 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8709 When STMT_INFO is vectorized as a nested cycle, REDUC_DEF is the vector
8710 variable to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1,
8711 and in else clause if it is 2).
8713 Return true if STMT_INFO is vectorizable in this way. */
8716 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8717 stmt_vec_info
*vec_stmt
, tree reduc_def
,
8718 int reduc_index
, slp_tree slp_node
,
8719 stmt_vector_for_cost
*cost_vec
)
8721 vec_info
*vinfo
= stmt_info
->vinfo
;
8722 tree scalar_dest
= NULL_TREE
;
8723 tree vec_dest
= NULL_TREE
;
8724 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8725 tree then_clause
, else_clause
;
8726 tree comp_vectype
= NULL_TREE
;
8727 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8728 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8731 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8732 enum vect_def_type dts
[4]
8733 = {vect_unknown_def_type
, vect_unknown_def_type
,
8734 vect_unknown_def_type
, vect_unknown_def_type
};
8737 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8738 stmt_vec_info prev_stmt_info
= NULL
;
8740 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8741 vec
<tree
> vec_oprnds0
= vNULL
;
8742 vec
<tree
> vec_oprnds1
= vNULL
;
8743 vec
<tree
> vec_oprnds2
= vNULL
;
8744 vec
<tree
> vec_oprnds3
= vNULL
;
8746 bool masked
= false;
8748 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
8751 vect_reduction_type reduction_type
8752 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8753 if (reduction_type
== TREE_CODE_REDUCTION
)
8755 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8758 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8759 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8763 /* FORNOW: not yet supported. */
8764 if (STMT_VINFO_LIVE_P (stmt_info
))
8766 if (dump_enabled_p ())
8767 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8768 "value used after loop.\n");
8773 /* Is vectorizable conditional operation? */
8774 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
8778 code
= gimple_assign_rhs_code (stmt
);
8780 if (code
!= COND_EXPR
)
8783 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8784 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8789 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8791 gcc_assert (ncopies
>= 1);
8792 if (reduc_index
&& ncopies
> 1)
8793 return false; /* FORNOW */
8795 cond_expr
= gimple_assign_rhs1 (stmt
);
8796 then_clause
= gimple_assign_rhs2 (stmt
);
8797 else_clause
= gimple_assign_rhs3 (stmt
);
8799 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8800 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
8804 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
8806 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
8809 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8812 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8815 masked
= !COMPARISON_CLASS_P (cond_expr
);
8816 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8818 if (vec_cmp_type
== NULL_TREE
)
8821 cond_code
= TREE_CODE (cond_expr
);
8824 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8825 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8828 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8830 /* Boolean values may have another representation in vectors
8831 and therefore we prefer bit operations over comparison for
8832 them (which also works for scalar masks). We store opcodes
8833 to use in bitop1 and bitop2. Statement is vectorized as
8834 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8835 depending on bitop1 and bitop2 arity. */
8839 bitop1
= BIT_NOT_EXPR
;
8840 bitop2
= BIT_AND_EXPR
;
8843 bitop1
= BIT_NOT_EXPR
;
8844 bitop2
= BIT_IOR_EXPR
;
8847 bitop1
= BIT_NOT_EXPR
;
8848 bitop2
= BIT_AND_EXPR
;
8849 std::swap (cond_expr0
, cond_expr1
);
8852 bitop1
= BIT_NOT_EXPR
;
8853 bitop2
= BIT_IOR_EXPR
;
8854 std::swap (cond_expr0
, cond_expr1
);
8857 bitop1
= BIT_XOR_EXPR
;
8860 bitop1
= BIT_XOR_EXPR
;
8861 bitop2
= BIT_NOT_EXPR
;
8866 cond_code
= SSA_NAME
;
8871 if (bitop1
!= NOP_EXPR
)
8873 machine_mode mode
= TYPE_MODE (comp_vectype
);
8876 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8877 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8880 if (bitop2
!= NOP_EXPR
)
8882 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8884 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8888 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8891 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8892 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
8903 vec_oprnds0
.create (1);
8904 vec_oprnds1
.create (1);
8905 vec_oprnds2
.create (1);
8906 vec_oprnds3
.create (1);
8910 scalar_dest
= gimple_assign_lhs (stmt
);
8911 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
8912 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8914 /* Handle cond expr. */
8915 for (j
= 0; j
< ncopies
; j
++)
8917 stmt_vec_info new_stmt_info
= NULL
;
8922 auto_vec
<tree
, 4> ops
;
8923 auto_vec
<vec
<tree
>, 4> vec_defs
;
8926 ops
.safe_push (cond_expr
);
8929 ops
.safe_push (cond_expr0
);
8930 ops
.safe_push (cond_expr1
);
8932 ops
.safe_push (then_clause
);
8933 ops
.safe_push (else_clause
);
8934 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8935 vec_oprnds3
= vec_defs
.pop ();
8936 vec_oprnds2
= vec_defs
.pop ();
8938 vec_oprnds1
= vec_defs
.pop ();
8939 vec_oprnds0
= vec_defs
.pop ();
8946 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
8948 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
, &dts
[0]);
8953 = vect_get_vec_def_for_operand (cond_expr0
,
8954 stmt_info
, comp_vectype
);
8955 vect_is_simple_use (cond_expr0
, loop_vinfo
, &dts
[0]);
8958 = vect_get_vec_def_for_operand (cond_expr1
,
8959 stmt_info
, comp_vectype
);
8960 vect_is_simple_use (cond_expr1
, loop_vinfo
, &dts
[1]);
8962 if (reduc_index
== 1)
8963 vec_then_clause
= reduc_def
;
8966 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8968 vect_is_simple_use (then_clause
, loop_vinfo
, &dts
[2]);
8970 if (reduc_index
== 2)
8971 vec_else_clause
= reduc_def
;
8974 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8976 vect_is_simple_use (else_clause
, loop_vinfo
, &dts
[3]);
8983 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
8986 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
8988 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
8989 vec_oprnds2
.pop ());
8990 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
8991 vec_oprnds3
.pop ());
8996 vec_oprnds0
.quick_push (vec_cond_lhs
);
8998 vec_oprnds1
.quick_push (vec_cond_rhs
);
8999 vec_oprnds2
.quick_push (vec_then_clause
);
9000 vec_oprnds3
.quick_push (vec_else_clause
);
9003 /* Arguments are ready. Create the new vector stmt. */
9004 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
9006 vec_then_clause
= vec_oprnds2
[i
];
9007 vec_else_clause
= vec_oprnds3
[i
];
9010 vec_compare
= vec_cond_lhs
;
9013 vec_cond_rhs
= vec_oprnds1
[i
];
9014 if (bitop1
== NOP_EXPR
)
9015 vec_compare
= build2 (cond_code
, vec_cmp_type
,
9016 vec_cond_lhs
, vec_cond_rhs
);
9019 new_temp
= make_ssa_name (vec_cmp_type
);
9021 if (bitop1
== BIT_NOT_EXPR
)
9022 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
9026 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
9028 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9029 if (bitop2
== NOP_EXPR
)
9030 vec_compare
= new_temp
;
9031 else if (bitop2
== BIT_NOT_EXPR
)
9033 /* Instead of doing ~x ? y : z do x ? z : y. */
9034 vec_compare
= new_temp
;
9035 std::swap (vec_then_clause
, vec_else_clause
);
9039 vec_compare
= make_ssa_name (vec_cmp_type
);
9041 = gimple_build_assign (vec_compare
, bitop2
,
9042 vec_cond_lhs
, new_temp
);
9043 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9047 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9049 if (!is_gimple_val (vec_compare
))
9051 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
9052 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
9054 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9055 vec_compare
= vec_compare_name
;
9057 gcc_assert (reduc_index
== 2);
9058 gcall
*new_stmt
= gimple_build_call_internal
9059 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
9061 gimple_call_set_lhs (new_stmt
, scalar_dest
);
9062 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
9063 if (stmt_info
->stmt
== gsi_stmt (*gsi
))
9064 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
9067 /* In this case we're moving the definition to later in the
9068 block. That doesn't matter because the only uses of the
9069 lhs are in phi statements. */
9070 gimple_stmt_iterator old_gsi
9071 = gsi_for_stmt (stmt_info
->stmt
);
9072 gsi_remove (&old_gsi
, true);
9074 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9079 new_temp
= make_ssa_name (vec_dest
);
9081 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
9082 vec_then_clause
, vec_else_clause
);
9084 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9087 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9094 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9096 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9098 prev_stmt_info
= new_stmt_info
;
9101 vec_oprnds0
.release ();
9102 vec_oprnds1
.release ();
9103 vec_oprnds2
.release ();
9104 vec_oprnds3
.release ();
9109 /* vectorizable_comparison.
9111 Check if STMT_INFO is comparison expression that can be vectorized.
9112 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9113 comparison, put it in VEC_STMT, and insert it at GSI.
9115 Return true if STMT_INFO is vectorizable in this way. */
9118 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9119 stmt_vec_info
*vec_stmt
, tree reduc_def
,
9120 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9122 vec_info
*vinfo
= stmt_info
->vinfo
;
9123 tree lhs
, rhs1
, rhs2
;
9124 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9125 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9126 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
9128 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9129 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
9133 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9134 stmt_vec_info prev_stmt_info
= NULL
;
9136 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9137 vec
<tree
> vec_oprnds0
= vNULL
;
9138 vec
<tree
> vec_oprnds1
= vNULL
;
9142 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9145 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
9148 mask_type
= vectype
;
9149 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9154 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9156 gcc_assert (ncopies
>= 1);
9157 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9158 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
9162 if (STMT_VINFO_LIVE_P (stmt_info
))
9164 if (dump_enabled_p ())
9165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9166 "value used after loop.\n");
9170 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9174 code
= gimple_assign_rhs_code (stmt
);
9176 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
9179 rhs1
= gimple_assign_rhs1 (stmt
);
9180 rhs2
= gimple_assign_rhs2 (stmt
);
9182 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
9185 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
9188 if (vectype1
&& vectype2
9189 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9190 TYPE_VECTOR_SUBPARTS (vectype2
)))
9193 vectype
= vectype1
? vectype1
: vectype2
;
9195 /* Invariant comparison. */
9198 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
9199 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
9202 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
9205 /* Can't compare mask and non-mask types. */
9206 if (vectype1
&& vectype2
9207 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
9210 /* Boolean values may have another representation in vectors
9211 and therefore we prefer bit operations over comparison for
9212 them (which also works for scalar masks). We store opcodes
9213 to use in bitop1 and bitop2. Statement is vectorized as
9214 BITOP2 (rhs1 BITOP1 rhs2) or
9215 rhs1 BITOP2 (BITOP1 rhs2)
9216 depending on bitop1 and bitop2 arity. */
9217 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
9219 if (code
== GT_EXPR
)
9221 bitop1
= BIT_NOT_EXPR
;
9222 bitop2
= BIT_AND_EXPR
;
9224 else if (code
== GE_EXPR
)
9226 bitop1
= BIT_NOT_EXPR
;
9227 bitop2
= BIT_IOR_EXPR
;
9229 else if (code
== LT_EXPR
)
9231 bitop1
= BIT_NOT_EXPR
;
9232 bitop2
= BIT_AND_EXPR
;
9233 std::swap (rhs1
, rhs2
);
9234 std::swap (dts
[0], dts
[1]);
9236 else if (code
== LE_EXPR
)
9238 bitop1
= BIT_NOT_EXPR
;
9239 bitop2
= BIT_IOR_EXPR
;
9240 std::swap (rhs1
, rhs2
);
9241 std::swap (dts
[0], dts
[1]);
9245 bitop1
= BIT_XOR_EXPR
;
9246 if (code
== EQ_EXPR
)
9247 bitop2
= BIT_NOT_EXPR
;
9253 if (bitop1
== NOP_EXPR
)
9255 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
9260 machine_mode mode
= TYPE_MODE (vectype
);
9263 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
9264 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9267 if (bitop2
!= NOP_EXPR
)
9269 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
9270 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9275 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
9276 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
9277 dts
, ndts
, slp_node
, cost_vec
);
9284 vec_oprnds0
.create (1);
9285 vec_oprnds1
.create (1);
9289 lhs
= gimple_assign_lhs (stmt
);
9290 mask
= vect_create_destination_var (lhs
, mask_type
);
9292 /* Handle cmp expr. */
9293 for (j
= 0; j
< ncopies
; j
++)
9295 stmt_vec_info new_stmt_info
= NULL
;
9300 auto_vec
<tree
, 2> ops
;
9301 auto_vec
<vec
<tree
>, 2> vec_defs
;
9303 ops
.safe_push (rhs1
);
9304 ops
.safe_push (rhs2
);
9305 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9306 vec_oprnds1
= vec_defs
.pop ();
9307 vec_oprnds0
= vec_defs
.pop ();
9311 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
9313 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
9319 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
9320 vec_oprnds0
.pop ());
9321 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
9322 vec_oprnds1
.pop ());
9327 vec_oprnds0
.quick_push (vec_rhs1
);
9328 vec_oprnds1
.quick_push (vec_rhs2
);
9331 /* Arguments are ready. Create the new vector stmt. */
9332 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
9334 vec_rhs2
= vec_oprnds1
[i
];
9336 new_temp
= make_ssa_name (mask
);
9337 if (bitop1
== NOP_EXPR
)
9339 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
9340 vec_rhs1
, vec_rhs2
);
9342 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9347 if (bitop1
== BIT_NOT_EXPR
)
9348 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
9350 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
9353 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9354 if (bitop2
!= NOP_EXPR
)
9356 tree res
= make_ssa_name (mask
);
9357 if (bitop2
== BIT_NOT_EXPR
)
9358 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
9360 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
9363 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9367 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9374 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9376 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9378 prev_stmt_info
= new_stmt_info
;
9381 vec_oprnds0
.release ();
9382 vec_oprnds1
.release ();
9387 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9388 can handle all live statements in the node. Otherwise return true
9389 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9390 GSI and VEC_STMT are as for vectorizable_live_operation. */
9393 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9394 slp_tree slp_node
, stmt_vec_info
*vec_stmt
,
9395 stmt_vector_for_cost
*cost_vec
)
9399 stmt_vec_info slp_stmt_info
;
9401 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
9403 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
9404 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
, i
,
9405 vec_stmt
, cost_vec
))
9409 else if (STMT_VINFO_LIVE_P (stmt_info
)
9410 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
, -1,
9411 vec_stmt
, cost_vec
))
9417 /* Make sure the statement is vectorizable. */
9420 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
9421 slp_tree node
, slp_instance node_instance
,
9422 stmt_vector_for_cost
*cost_vec
)
9424 vec_info
*vinfo
= stmt_info
->vinfo
;
9425 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9426 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
9428 gimple_seq pattern_def_seq
;
9430 if (dump_enabled_p ())
9432 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
9433 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
9436 if (gimple_has_volatile_ops (stmt_info
->stmt
))
9438 if (dump_enabled_p ())
9439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9440 "not vectorized: stmt has volatile operands\n");
9445 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9447 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
9449 gimple_stmt_iterator si
;
9451 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
9453 stmt_vec_info pattern_def_stmt_info
9454 = vinfo
->lookup_stmt (gsi_stmt (si
));
9455 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
9456 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
9458 /* Analyze def stmt of STMT if it's a pattern stmt. */
9459 if (dump_enabled_p ())
9461 dump_printf_loc (MSG_NOTE
, vect_location
,
9462 "==> examining pattern def statement: ");
9463 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
9464 pattern_def_stmt_info
->stmt
, 0);
9467 if (!vect_analyze_stmt (pattern_def_stmt_info
,
9468 need_to_vectorize
, node
, node_instance
,
9475 /* Skip stmts that do not need to be vectorized. In loops this is expected
9477 - the COND_EXPR which is the loop exit condition
9478 - any LABEL_EXPRs in the loop
9479 - computations that are used only for array indexing or loop control.
9480 In basic blocks we only analyze statements that are a part of some SLP
9481 instance, therefore, all the statements are relevant.
9483 Pattern statement needs to be analyzed instead of the original statement
9484 if the original statement is not relevant. Otherwise, we analyze both
9485 statements. In basic blocks we are called from some SLP instance
9486 traversal, don't analyze pattern stmts instead, the pattern stmts
9487 already will be part of SLP instance. */
9489 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
9490 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
9491 && !STMT_VINFO_LIVE_P (stmt_info
))
9493 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9494 && pattern_stmt_info
9495 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
9496 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
9498 /* Analyze PATTERN_STMT instead of the original stmt. */
9499 stmt_info
= pattern_stmt_info
;
9500 if (dump_enabled_p ())
9502 dump_printf_loc (MSG_NOTE
, vect_location
,
9503 "==> examining pattern statement: ");
9504 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt_info
->stmt
, 0);
9509 if (dump_enabled_p ())
9510 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
9515 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9517 && pattern_stmt_info
9518 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
9519 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
9521 /* Analyze PATTERN_STMT too. */
9522 if (dump_enabled_p ())
9524 dump_printf_loc (MSG_NOTE
, vect_location
,
9525 "==> examining pattern statement: ");
9526 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt_info
->stmt
, 0);
9529 if (!vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
9530 node_instance
, cost_vec
))
9534 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
9536 case vect_internal_def
:
9539 case vect_reduction_def
:
9540 case vect_nested_cycle
:
9541 gcc_assert (!bb_vinfo
9542 && (relevance
== vect_used_in_outer
9543 || relevance
== vect_used_in_outer_by_reduction
9544 || relevance
== vect_used_by_reduction
9545 || relevance
== vect_unused_in_scope
9546 || relevance
== vect_used_only_live
));
9549 case vect_induction_def
:
9550 gcc_assert (!bb_vinfo
);
9553 case vect_constant_def
:
9554 case vect_external_def
:
9555 case vect_unknown_def_type
:
9560 if (STMT_VINFO_RELEVANT_P (stmt_info
))
9562 tree type
= gimple_expr_type (stmt_info
->stmt
);
9563 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
9564 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9565 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
9566 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
9567 *need_to_vectorize
= true;
9570 if (PURE_SLP_STMT (stmt_info
) && !node
)
9572 dump_printf_loc (MSG_NOTE
, vect_location
,
9573 "handled only by SLP analysis\n");
9579 && (STMT_VINFO_RELEVANT_P (stmt_info
)
9580 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
9581 ok
= (vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9582 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9583 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9584 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9585 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9586 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
9588 || vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9589 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9590 || vectorizable_reduction (stmt_info
, NULL
, NULL
, node
,
9591 node_instance
, cost_vec
)
9592 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9593 || vectorizable_condition (stmt_info
, NULL
, NULL
, NULL
, 0, node
,
9595 || vectorizable_comparison (stmt_info
, NULL
, NULL
, NULL
, node
,
9600 ok
= (vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
9602 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
9604 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9605 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9606 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
9608 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
9610 || vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9611 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9612 || vectorizable_condition (stmt_info
, NULL
, NULL
, NULL
, 0, node
,
9614 || vectorizable_comparison (stmt_info
, NULL
, NULL
, NULL
, node
,
9620 if (dump_enabled_p ())
9622 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9623 "not vectorized: relevant stmt not ");
9624 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
9625 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
9626 stmt_info
->stmt
, 0);
9632 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9633 need extra handling, except for vectorizable reductions. */
9635 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9636 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, NULL
, cost_vec
))
9638 if (dump_enabled_p ())
9640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9641 "not vectorized: live stmt not supported: ");
9642 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
9643 stmt_info
->stmt
, 0);
9653 /* Function vect_transform_stmt.
9655 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9658 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9659 bool *grouped_store
, slp_tree slp_node
,
9660 slp_instance slp_node_instance
)
9662 vec_info
*vinfo
= stmt_info
->vinfo
;
9663 bool is_store
= false;
9664 stmt_vec_info vec_stmt
= NULL
;
9667 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9668 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
9670 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
9671 && nested_in_vect_loop_p
9672 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
9675 gimple
*stmt
= stmt_info
->stmt
;
9676 switch (STMT_VINFO_TYPE (stmt_info
))
9678 case type_demotion_vec_info_type
:
9679 case type_promotion_vec_info_type
:
9680 case type_conversion_vec_info_type
:
9681 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9686 case induc_vec_info_type
:
9687 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9692 case shift_vec_info_type
:
9693 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9697 case op_vec_info_type
:
9698 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9703 case assignment_vec_info_type
:
9704 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9709 case load_vec_info_type
:
9710 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9711 slp_node_instance
, NULL
);
9715 case store_vec_info_type
:
9716 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9718 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9720 /* In case of interleaving, the whole chain is vectorized when the
9721 last store in the chain is reached. Store stmts before the last
9722 one are skipped, and there vec_stmt_info shouldn't be freed
9724 *grouped_store
= true;
9725 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9726 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
9733 case condition_vec_info_type
:
9734 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, NULL
, 0,
9739 case comparison_vec_info_type
:
9740 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
, NULL
,
9745 case call_vec_info_type
:
9746 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9747 stmt
= gsi_stmt (*gsi
);
9750 case call_simd_clone_vec_info_type
:
9751 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
9753 stmt
= gsi_stmt (*gsi
);
9756 case reduc_vec_info_type
:
9757 done
= vectorizable_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9758 slp_node_instance
, NULL
);
9763 if (!STMT_VINFO_LIVE_P (stmt_info
))
9765 if (dump_enabled_p ())
9766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9767 "stmt not supported.\n");
9772 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9773 This would break hybrid SLP vectorization. */
9775 gcc_assert (!vec_stmt
9776 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
9778 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9779 is being vectorized, but outside the immediately enclosing loop. */
9782 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9783 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9784 || STMT_VINFO_RELEVANT (stmt_info
) ==
9785 vect_used_in_outer_by_reduction
))
9787 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9788 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9789 imm_use_iterator imm_iter
;
9790 use_operand_p use_p
;
9793 if (dump_enabled_p ())
9794 dump_printf_loc (MSG_NOTE
, vect_location
,
9795 "Record the vdef for outer-loop vectorization.\n");
9797 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9798 (to be used when vectorizing outer-loop stmts that use the DEF of
9800 if (gimple_code (stmt
) == GIMPLE_PHI
)
9801 scalar_dest
= PHI_RESULT (stmt
);
9803 scalar_dest
= gimple_assign_lhs (stmt
);
9805 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9806 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9808 stmt_vec_info exit_phi_info
9809 = vinfo
->lookup_stmt (USE_STMT (use_p
));
9810 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
9814 /* Handle stmts whose DEF is used outside the loop-nest that is
9815 being vectorized. */
9816 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9818 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
, &vec_stmt
,
9824 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9830 /* Remove a group of stores (for SLP or interleaving), free their
9834 vect_remove_stores (stmt_vec_info first_stmt_info
)
9836 stmt_vec_info next_stmt_info
= first_stmt_info
;
9837 gimple_stmt_iterator next_si
;
9839 while (next_stmt_info
)
9841 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9842 if (is_pattern_stmt_p (next_stmt_info
))
9843 next_stmt_info
= STMT_VINFO_RELATED_STMT (next_stmt_info
);
9844 /* Free the attached stmt_vec_info and remove the stmt. */
9845 next_si
= gsi_for_stmt (next_stmt_info
->stmt
);
9846 unlink_stmt_vdef (next_stmt_info
->stmt
);
9847 gsi_remove (&next_si
, true);
9848 release_defs (next_stmt_info
->stmt
);
9849 free_stmt_vec_info (next_stmt_info
);
9850 next_stmt_info
= tmp
;
9855 /* Function new_stmt_vec_info.
9857 Create and initialize a new stmt_vec_info struct for STMT. */
9860 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
9863 res
= (_stmt_vec_info
*) xcalloc (1, sizeof (struct _stmt_vec_info
));
9865 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
9866 STMT_VINFO_STMT (res
) = stmt
;
9868 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
9869 STMT_VINFO_LIVE_P (res
) = false;
9870 STMT_VINFO_VECTYPE (res
) = NULL
;
9871 STMT_VINFO_VEC_STMT (res
) = NULL
;
9872 STMT_VINFO_VECTORIZABLE (res
) = true;
9873 STMT_VINFO_IN_PATTERN_P (res
) = false;
9874 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
9875 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
9876 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
9878 if (gimple_code (stmt
) == GIMPLE_PHI
9879 && is_loop_header_bb_p (gimple_bb (stmt
)))
9880 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
9882 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
9884 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
9885 STMT_SLP_TYPE (res
) = loop_vect
;
9886 STMT_VINFO_NUM_SLP_USES (res
) = 0;
9888 res
->first_element
= NULL
; /* GROUP_FIRST_ELEMENT */
9889 res
->next_element
= NULL
; /* GROUP_NEXT_ELEMENT */
9890 res
->size
= 0; /* GROUP_SIZE */
9891 res
->store_count
= 0; /* GROUP_STORE_COUNT */
9892 res
->gap
= 0; /* GROUP_GAP */
9893 res
->same_dr_stmt
= NULL
; /* GROUP_SAME_DR_STMT */
9895 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9896 res
->dr_aux
.misalignment
= DR_MISALIGNMENT_UNINITIALIZED
;
9902 /* Set the current stmt_vec_info vector to V. */
9905 set_stmt_vec_info_vec (vec
<stmt_vec_info
> *v
)
9907 stmt_vec_info_vec
= v
;
9910 /* Free the stmt_vec_info entries in V and release V. */
9913 free_stmt_vec_infos (vec
<stmt_vec_info
> *v
)
9917 FOR_EACH_VEC_ELT (*v
, i
, info
)
9918 if (info
!= NULL_STMT_VEC_INFO
)
9919 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9920 if (v
== stmt_vec_info_vec
)
9921 stmt_vec_info_vec
= NULL
;
9926 /* Free stmt vectorization related info. */
9929 free_stmt_vec_info (gimple
*stmt
)
9931 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9936 /* Check if this statement has a related "pattern stmt"
9937 (introduced by the vectorizer during the pattern recognition
9938 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9940 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9942 if (gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
))
9943 for (gimple_stmt_iterator si
= gsi_start (seq
);
9944 !gsi_end_p (si
); gsi_next (&si
))
9946 gimple
*seq_stmt
= gsi_stmt (si
);
9947 gimple_set_bb (seq_stmt
, NULL
);
9948 tree lhs
= gimple_get_lhs (seq_stmt
);
9949 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9950 release_ssa_name (lhs
);
9951 free_stmt_vec_info (seq_stmt
);
9953 stmt_vec_info patt_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
9956 gimple_set_bb (patt_stmt_info
->stmt
, NULL
);
9957 tree lhs
= gimple_get_lhs (patt_stmt_info
->stmt
);
9958 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9959 release_ssa_name (lhs
);
9960 free_stmt_vec_info (patt_stmt_info
);
9964 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9965 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9966 set_vinfo_for_stmt (stmt
, NULL
);
9971 /* Function get_vectype_for_scalar_type_and_size.
9973 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9977 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9979 tree orig_scalar_type
= scalar_type
;
9980 scalar_mode inner_mode
;
9981 machine_mode simd_mode
;
9985 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9986 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9989 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9991 /* For vector types of elements whose mode precision doesn't
9992 match their types precision we use a element type of mode
9993 precision. The vectorization routines will have to make sure
9994 they support the proper result truncation/extension.
9995 We also make sure to build vector types with INTEGER_TYPE
9996 component type only. */
9997 if (INTEGRAL_TYPE_P (scalar_type
)
9998 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9999 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
10000 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
10001 TYPE_UNSIGNED (scalar_type
));
10003 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10004 When the component mode passes the above test simply use a type
10005 corresponding to that mode. The theory is that any use that
10006 would cause problems with this will disable vectorization anyway. */
10007 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
10008 && !INTEGRAL_TYPE_P (scalar_type
))
10009 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
10011 /* We can't build a vector type of elements with alignment bigger than
10013 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
10014 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
10015 TYPE_UNSIGNED (scalar_type
));
10017 /* If we felt back to using the mode fail if there was
10018 no scalar type for it. */
10019 if (scalar_type
== NULL_TREE
)
10022 /* If no size was supplied use the mode the target prefers. Otherwise
10023 lookup a vector mode of the specified size. */
10024 if (known_eq (size
, 0U))
10025 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
10026 else if (!multiple_p (size
, nbytes
, &nunits
)
10027 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
10029 /* NOTE: nunits == 1 is allowed to support single element vector types. */
10030 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
10033 vectype
= build_vector_type (scalar_type
, nunits
);
10035 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
10036 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
10039 /* Re-attach the address-space qualifier if we canonicalized the scalar
10041 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
10042 return build_qualified_type
10043 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
10048 poly_uint64 current_vector_size
;
10050 /* Function get_vectype_for_scalar_type.
10052 Returns the vector type corresponding to SCALAR_TYPE as supported
10056 get_vectype_for_scalar_type (tree scalar_type
)
10059 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
10060 current_vector_size
);
10062 && known_eq (current_vector_size
, 0U))
10063 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
10067 /* Function get_mask_type_for_scalar_type.
10069 Returns the mask type corresponding to a result of comparison
10070 of vectors of specified SCALAR_TYPE as supported by target. */
10073 get_mask_type_for_scalar_type (tree scalar_type
)
10075 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
10080 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
10081 current_vector_size
);
10084 /* Function get_same_sized_vectype
10086 Returns a vector type corresponding to SCALAR_TYPE of size
10087 VECTOR_TYPE if supported by the target. */
10090 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
10092 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10093 return build_same_sized_truth_vector_type (vector_type
);
10095 return get_vectype_for_scalar_type_and_size
10096 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
10099 /* Function vect_is_simple_use.
10102 VINFO - the vect info of the loop or basic block that is being vectorized.
10103 OPERAND - operand in the loop or bb.
10105 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10106 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10107 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10108 the definition could be anywhere in the function
10109 DT - the type of definition
10111 Returns whether a stmt with OPERAND can be vectorized.
10112 For loops, supportable operands are constants, loop invariants, and operands
10113 that are defined by the current iteration of the loop. Unsupportable
10114 operands are those that are defined by a previous iteration of the loop (as
10115 is the case in reduction/induction computations).
10116 For basic blocks, supportable operands are constants and bb invariants.
10117 For now, operands defined outside the basic block are not supported. */
10120 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10121 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
10123 if (def_stmt_info_out
)
10124 *def_stmt_info_out
= NULL
;
10126 *def_stmt_out
= NULL
;
10127 *dt
= vect_unknown_def_type
;
10129 if (dump_enabled_p ())
10131 dump_printf_loc (MSG_NOTE
, vect_location
,
10132 "vect_is_simple_use: operand ");
10133 if (TREE_CODE (operand
) == SSA_NAME
10134 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
10135 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
10137 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
10140 if (CONSTANT_CLASS_P (operand
))
10141 *dt
= vect_constant_def
;
10142 else if (is_gimple_min_invariant (operand
))
10143 *dt
= vect_external_def
;
10144 else if (TREE_CODE (operand
) != SSA_NAME
)
10145 *dt
= vect_unknown_def_type
;
10146 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
10147 *dt
= vect_external_def
;
10150 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
10151 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
10153 *dt
= vect_external_def
;
10156 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
10158 stmt_vinfo
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
10159 def_stmt
= stmt_vinfo
->stmt
;
10161 switch (gimple_code (def_stmt
))
10164 case GIMPLE_ASSIGN
:
10166 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
10169 *dt
= vect_unknown_def_type
;
10172 if (def_stmt_info_out
)
10173 *def_stmt_info_out
= stmt_vinfo
;
10176 *def_stmt_out
= def_stmt
;
10179 if (dump_enabled_p ())
10181 dump_printf (MSG_NOTE
, ", type of def: ");
10184 case vect_uninitialized_def
:
10185 dump_printf (MSG_NOTE
, "uninitialized\n");
10187 case vect_constant_def
:
10188 dump_printf (MSG_NOTE
, "constant\n");
10190 case vect_external_def
:
10191 dump_printf (MSG_NOTE
, "external\n");
10193 case vect_internal_def
:
10194 dump_printf (MSG_NOTE
, "internal\n");
10196 case vect_induction_def
:
10197 dump_printf (MSG_NOTE
, "induction\n");
10199 case vect_reduction_def
:
10200 dump_printf (MSG_NOTE
, "reduction\n");
10202 case vect_double_reduction_def
:
10203 dump_printf (MSG_NOTE
, "double reduction\n");
10205 case vect_nested_cycle
:
10206 dump_printf (MSG_NOTE
, "nested cycle\n");
10208 case vect_unknown_def_type
:
10209 dump_printf (MSG_NOTE
, "unknown\n");
10214 if (*dt
== vect_unknown_def_type
)
10216 if (dump_enabled_p ())
10217 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10218 "Unsupported pattern.\n");
10225 /* Function vect_is_simple_use.
10227 Same as vect_is_simple_use but also determines the vector operand
10228 type of OPERAND and stores it to *VECTYPE. If the definition of
10229 OPERAND is vect_uninitialized_def, vect_constant_def or
10230 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10231 is responsible to compute the best suited vector type for the
10235 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10236 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
10237 gimple
**def_stmt_out
)
10239 stmt_vec_info def_stmt_info
;
10241 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
10245 *def_stmt_out
= def_stmt
;
10246 if (def_stmt_info_out
)
10247 *def_stmt_info_out
= def_stmt_info
;
10249 /* Now get a vector type if the def is internal, otherwise supply
10250 NULL_TREE and leave it up to the caller to figure out a proper
10251 type for the use stmt. */
10252 if (*dt
== vect_internal_def
10253 || *dt
== vect_induction_def
10254 || *dt
== vect_reduction_def
10255 || *dt
== vect_double_reduction_def
10256 || *dt
== vect_nested_cycle
)
10258 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
10259 gcc_assert (*vectype
!= NULL_TREE
);
10260 if (dump_enabled_p ())
10262 dump_printf_loc (MSG_NOTE
, vect_location
,
10263 "vect_is_simple_use: vectype ");
10264 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, *vectype
);
10265 dump_printf (MSG_NOTE
, "\n");
10268 else if (*dt
== vect_uninitialized_def
10269 || *dt
== vect_constant_def
10270 || *dt
== vect_external_def
)
10271 *vectype
= NULL_TREE
;
10273 gcc_unreachable ();
10279 /* Function supportable_widening_operation
10281 Check whether an operation represented by the code CODE is a
10282 widening operation that is supported by the target platform in
10283 vector form (i.e., when operating on arguments of type VECTYPE_IN
10284 producing a result of type VECTYPE_OUT).
10286 Widening operations we currently support are NOP (CONVERT), FLOAT,
10287 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10288 are supported by the target platform either directly (via vector
10289 tree-codes), or via target builtins.
10292 - CODE1 and CODE2 are codes of vector operations to be used when
10293 vectorizing the operation, if available.
10294 - MULTI_STEP_CVT determines the number of required intermediate steps in
10295 case of multi-step conversion (like char->short->int - in that case
10296 MULTI_STEP_CVT will be 1).
10297 - INTERM_TYPES contains the intermediate type required to perform the
10298 widening operation (short in the above example). */
10301 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
10302 tree vectype_out
, tree vectype_in
,
10303 enum tree_code
*code1
, enum tree_code
*code2
,
10304 int *multi_step_cvt
,
10305 vec
<tree
> *interm_types
)
10307 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10308 struct loop
*vect_loop
= NULL
;
10309 machine_mode vec_mode
;
10310 enum insn_code icode1
, icode2
;
10311 optab optab1
, optab2
;
10312 tree vectype
= vectype_in
;
10313 tree wide_vectype
= vectype_out
;
10314 enum tree_code c1
, c2
;
10316 tree prev_type
, intermediate_type
;
10317 machine_mode intermediate_mode
, prev_mode
;
10318 optab optab3
, optab4
;
10320 *multi_step_cvt
= 0;
10322 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
10326 case WIDEN_MULT_EXPR
:
10327 /* The result of a vectorized widening operation usually requires
10328 two vectors (because the widened results do not fit into one vector).
10329 The generated vector results would normally be expected to be
10330 generated in the same order as in the original scalar computation,
10331 i.e. if 8 results are generated in each vector iteration, they are
10332 to be organized as follows:
10333 vect1: [res1,res2,res3,res4],
10334 vect2: [res5,res6,res7,res8].
10336 However, in the special case that the result of the widening
10337 operation is used in a reduction computation only, the order doesn't
10338 matter (because when vectorizing a reduction we change the order of
10339 the computation). Some targets can take advantage of this and
10340 generate more efficient code. For example, targets like Altivec,
10341 that support widen_mult using a sequence of {mult_even,mult_odd}
10342 generate the following vectors:
10343 vect1: [res1,res3,res5,res7],
10344 vect2: [res2,res4,res6,res8].
10346 When vectorizing outer-loops, we execute the inner-loop sequentially
10347 (each vectorized inner-loop iteration contributes to VF outer-loop
10348 iterations in parallel). We therefore don't allow to change the
10349 order of the computation in the inner-loop during outer-loop
10351 /* TODO: Another case in which order doesn't *really* matter is when we
10352 widen and then contract again, e.g. (short)((int)x * y >> 8).
10353 Normally, pack_trunc performs an even/odd permute, whereas the
10354 repack from an even/odd expansion would be an interleave, which
10355 would be significantly simpler for e.g. AVX2. */
10356 /* In any case, in order to avoid duplicating the code below, recurse
10357 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10358 are properly set up for the caller. If we fail, we'll continue with
10359 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10361 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
10362 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
10363 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
10364 stmt_info
, vectype_out
,
10365 vectype_in
, code1
, code2
,
10366 multi_step_cvt
, interm_types
))
10368 /* Elements in a vector with vect_used_by_reduction property cannot
10369 be reordered if the use chain with this property does not have the
10370 same operation. One such an example is s += a * b, where elements
10371 in a and b cannot be reordered. Here we check if the vector defined
10372 by STMT is only directly used in the reduction statement. */
10373 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
10374 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
10376 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
10379 c1
= VEC_WIDEN_MULT_LO_EXPR
;
10380 c2
= VEC_WIDEN_MULT_HI_EXPR
;
10383 case DOT_PROD_EXPR
:
10384 c1
= DOT_PROD_EXPR
;
10385 c2
= DOT_PROD_EXPR
;
10393 case VEC_WIDEN_MULT_EVEN_EXPR
:
10394 /* Support the recursion induced just above. */
10395 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
10396 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
10399 case WIDEN_LSHIFT_EXPR
:
10400 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
10401 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
10405 c1
= VEC_UNPACK_LO_EXPR
;
10406 c2
= VEC_UNPACK_HI_EXPR
;
10410 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
10411 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
10414 case FIX_TRUNC_EXPR
:
10415 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
10416 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
10420 gcc_unreachable ();
10423 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
10424 std::swap (c1
, c2
);
10426 if (code
== FIX_TRUNC_EXPR
)
10428 /* The signedness is determined from output operand. */
10429 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10430 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
10434 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10435 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
10438 if (!optab1
|| !optab2
)
10441 vec_mode
= TYPE_MODE (vectype
);
10442 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
10443 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
10449 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10450 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10451 /* For scalar masks we may have different boolean
10452 vector types having the same QImode. Thus we
10453 add additional check for elements number. */
10454 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10455 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
10456 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10458 /* Check if it's a multi-step conversion that can be done using intermediate
10461 prev_type
= vectype
;
10462 prev_mode
= vec_mode
;
10464 if (!CONVERT_EXPR_CODE_P (code
))
10467 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10468 intermediate steps in promotion sequence. We try
10469 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10471 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10472 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10474 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10475 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10477 intermediate_type
= vect_halve_mask_nunits (prev_type
);
10478 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10483 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
10484 TYPE_UNSIGNED (prev_type
));
10486 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10487 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
10489 if (!optab3
|| !optab4
10490 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
10491 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10492 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
10493 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
10494 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
10495 == CODE_FOR_nothing
)
10496 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
10497 == CODE_FOR_nothing
))
10500 interm_types
->quick_push (intermediate_type
);
10501 (*multi_step_cvt
)++;
10503 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10504 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10505 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10506 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
10507 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10509 prev_type
= intermediate_type
;
10510 prev_mode
= intermediate_mode
;
10513 interm_types
->release ();
10518 /* Function supportable_narrowing_operation
10520 Check whether an operation represented by the code CODE is a
10521 narrowing operation that is supported by the target platform in
10522 vector form (i.e., when operating on arguments of type VECTYPE_IN
10523 and producing a result of type VECTYPE_OUT).
10525 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10526 and FLOAT. This function checks if these operations are supported by
10527 the target platform directly via vector tree-codes.
10530 - CODE1 is the code of a vector operation to be used when
10531 vectorizing the operation, if available.
10532 - MULTI_STEP_CVT determines the number of required intermediate steps in
10533 case of multi-step conversion (like int->short->char - in that case
10534 MULTI_STEP_CVT will be 1).
10535 - INTERM_TYPES contains the intermediate type required to perform the
10536 narrowing operation (short in the above example). */
10539 supportable_narrowing_operation (enum tree_code code
,
10540 tree vectype_out
, tree vectype_in
,
10541 enum tree_code
*code1
, int *multi_step_cvt
,
10542 vec
<tree
> *interm_types
)
10544 machine_mode vec_mode
;
10545 enum insn_code icode1
;
10546 optab optab1
, interm_optab
;
10547 tree vectype
= vectype_in
;
10548 tree narrow_vectype
= vectype_out
;
10550 tree intermediate_type
, prev_type
;
10551 machine_mode intermediate_mode
, prev_mode
;
10555 *multi_step_cvt
= 0;
10559 c1
= VEC_PACK_TRUNC_EXPR
;
10562 case FIX_TRUNC_EXPR
:
10563 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
10567 c1
= VEC_PACK_FLOAT_EXPR
;
10571 gcc_unreachable ();
10574 if (code
== FIX_TRUNC_EXPR
)
10575 /* The signedness is determined from output operand. */
10576 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10578 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10583 vec_mode
= TYPE_MODE (vectype
);
10584 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
10589 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10590 /* For scalar masks we may have different boolean
10591 vector types having the same QImode. Thus we
10592 add additional check for elements number. */
10593 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10594 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
10595 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10597 if (code
== FLOAT_EXPR
)
10600 /* Check if it's a multi-step conversion that can be done using intermediate
10602 prev_mode
= vec_mode
;
10603 prev_type
= vectype
;
10604 if (code
== FIX_TRUNC_EXPR
)
10605 uns
= TYPE_UNSIGNED (vectype_out
);
10607 uns
= TYPE_UNSIGNED (vectype
);
10609 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10610 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10611 costly than signed. */
10612 if (code
== FIX_TRUNC_EXPR
&& uns
)
10614 enum insn_code icode2
;
10617 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
10619 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10620 if (interm_optab
!= unknown_optab
10621 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
10622 && insn_data
[icode1
].operand
[0].mode
10623 == insn_data
[icode2
].operand
[0].mode
)
10626 optab1
= interm_optab
;
10631 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10632 intermediate steps in promotion sequence. We try
10633 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10634 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10635 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10637 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10638 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10640 intermediate_type
= vect_double_mask_nunits (prev_type
);
10641 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10646 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10648 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10651 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10652 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10653 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10654 == CODE_FOR_nothing
))
10657 interm_types
->quick_push (intermediate_type
);
10658 (*multi_step_cvt
)++;
10660 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10661 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10662 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10663 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10665 prev_mode
= intermediate_mode
;
10666 prev_type
= intermediate_type
;
10667 optab1
= interm_optab
;
10670 interm_types
->release ();
10674 /* Generate and return a statement that sets vector mask MASK such that
10675 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10678 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10680 tree cmp_type
= TREE_TYPE (start_index
);
10681 tree mask_type
= TREE_TYPE (mask
);
10682 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10683 cmp_type
, mask_type
,
10684 OPTIMIZE_FOR_SPEED
));
10685 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10686 start_index
, end_index
,
10687 build_zero_cst (mask_type
));
10688 gimple_call_set_lhs (call
, mask
);
10692 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10693 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10696 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10699 tree tmp
= make_ssa_name (mask_type
);
10700 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10701 gimple_seq_add_stmt (seq
, call
);
10702 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
10705 /* Try to compute the vector types required to vectorize STMT_INFO,
10706 returning true on success and false if vectorization isn't possible.
10710 - Set *STMT_VECTYPE_OUT to:
10711 - NULL_TREE if the statement doesn't need to be vectorized;
10712 - boolean_type_node if the statement is a boolean operation whose
10713 vector type can only be determined once all the other vector types
10715 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10717 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10718 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10719 statement does not help to determine the overall number of units. */
10722 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
10723 tree
*stmt_vectype_out
,
10724 tree
*nunits_vectype_out
)
10726 gimple
*stmt
= stmt_info
->stmt
;
10728 *stmt_vectype_out
= NULL_TREE
;
10729 *nunits_vectype_out
= NULL_TREE
;
10731 if (gimple_get_lhs (stmt
) == NULL_TREE
10732 /* MASK_STORE has no lhs, but is ok. */
10733 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10735 if (is_a
<gcall
*> (stmt
))
10737 /* Ignore calls with no lhs. These must be calls to
10738 #pragma omp simd functions, and what vectorization factor
10739 it really needs can't be determined until
10740 vectorizable_simd_clone_call. */
10741 if (dump_enabled_p ())
10742 dump_printf_loc (MSG_NOTE
, vect_location
,
10743 "defer to SIMD clone analysis.\n");
10747 if (dump_enabled_p ())
10749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10750 "not vectorized: irregular stmt.");
10751 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
10756 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
10758 if (dump_enabled_p ())
10760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10761 "not vectorized: vector stmt in loop:");
10762 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
10768 tree scalar_type
= NULL_TREE
;
10769 if (STMT_VINFO_VECTYPE (stmt_info
))
10770 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10773 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
10774 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10775 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
10777 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
10779 /* Pure bool ops don't participate in number-of-units computation.
10780 For comparisons use the types being compared. */
10781 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
10782 && is_gimple_assign (stmt
)
10783 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
10785 *stmt_vectype_out
= boolean_type_node
;
10787 tree rhs1
= gimple_assign_rhs1 (stmt
);
10788 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10789 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10790 scalar_type
= TREE_TYPE (rhs1
);
10793 if (dump_enabled_p ())
10794 dump_printf_loc (MSG_NOTE
, vect_location
,
10795 "pure bool operation.\n");
10800 if (dump_enabled_p ())
10802 dump_printf_loc (MSG_NOTE
, vect_location
,
10803 "get vectype for scalar type: ");
10804 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
10805 dump_printf (MSG_NOTE
, "\n");
10807 vectype
= get_vectype_for_scalar_type (scalar_type
);
10810 if (dump_enabled_p ())
10812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10813 "not vectorized: unsupported data-type ");
10814 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10816 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10821 if (!*stmt_vectype_out
)
10822 *stmt_vectype_out
= vectype
;
10824 if (dump_enabled_p ())
10826 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
10827 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
10828 dump_printf (MSG_NOTE
, "\n");
10832 /* Don't try to compute scalar types if the stmt produces a boolean
10833 vector; use the existing vector type instead. */
10834 tree nunits_vectype
;
10835 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10836 nunits_vectype
= vectype
;
10839 /* The number of units is set according to the smallest scalar
10840 type (or the largest vector size, but we only support one
10841 vector size per vectorization). */
10842 if (*stmt_vectype_out
!= boolean_type_node
)
10844 HOST_WIDE_INT dummy
;
10845 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
10848 if (dump_enabled_p ())
10850 dump_printf_loc (MSG_NOTE
, vect_location
,
10851 "get vectype for scalar type: ");
10852 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
10853 dump_printf (MSG_NOTE
, "\n");
10855 nunits_vectype
= get_vectype_for_scalar_type (scalar_type
);
10857 if (!nunits_vectype
)
10859 if (dump_enabled_p ())
10861 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10862 "not vectorized: unsupported data-type ");
10863 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
10864 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10869 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
10870 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
10872 if (dump_enabled_p ())
10874 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10875 "not vectorized: different sized vector "
10876 "types in statement, ");
10877 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
10878 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10879 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, nunits_vectype
);
10880 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10885 if (dump_enabled_p ())
10887 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
10888 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, nunits_vectype
);
10889 dump_printf (MSG_NOTE
, "\n");
10891 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
10892 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
10893 dump_printf (MSG_NOTE
, "\n");
10896 *nunits_vectype_out
= nunits_vectype
;
10900 /* Try to determine the correct vector type for STMT_INFO, which is a
10901 statement that produces a scalar boolean result. Return the vector
10902 type on success, otherwise return NULL_TREE. */
10905 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
10907 gimple
*stmt
= stmt_info
->stmt
;
10908 tree mask_type
= NULL
;
10909 tree vectype
, scalar_type
;
10911 if (is_gimple_assign (stmt
)
10912 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10913 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
10915 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
10916 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
10920 if (dump_enabled_p ())
10921 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10922 "not vectorized: unsupported mask\n");
10930 enum vect_def_type dt
;
10932 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
10934 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
10936 if (dump_enabled_p ())
10938 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10939 "not vectorized: can't compute mask type "
10940 "for statement, ");
10941 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
,
10947 /* No vectype probably means external definition.
10948 Allow it in case there is another operand which
10949 allows to determine mask type. */
10954 mask_type
= vectype
;
10955 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
10956 TYPE_VECTOR_SUBPARTS (vectype
)))
10958 if (dump_enabled_p ())
10960 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10961 "not vectorized: different sized masks "
10962 "types in statement, ");
10963 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10965 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10966 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10968 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10972 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
10973 != VECTOR_BOOLEAN_TYPE_P (vectype
))
10975 if (dump_enabled_p ())
10977 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10978 "not vectorized: mixed mask and "
10979 "nonmask vector types in statement, ");
10980 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10982 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10983 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10985 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10991 /* We may compare boolean value loaded as vector of integers.
10992 Fix mask_type in such case. */
10994 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
10995 && gimple_code (stmt
) == GIMPLE_ASSIGN
10996 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
10997 mask_type
= build_same_sized_truth_vector_type (mask_type
);
11000 /* No mask_type should mean loop invariant predicate.
11001 This is probably a subject for optimization in if-conversion. */
11002 if (!mask_type
&& dump_enabled_p ())
11004 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11005 "not vectorized: can't compute mask type "
11006 "for statement, ");
11007 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);