1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 int misalign
, enum vect_cost_model_location where
)
97 if ((kind
== vector_load
|| kind
== unaligned_load
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_gather_load
;
100 if ((kind
== vector_store
|| kind
== unaligned_store
)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
102 kind
= vector_scatter_store
;
104 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
105 body_cost_vec
->safe_push (si
);
107 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE
, vect_location
,
642 "init: stmt relevant? %G", stmt_info
->stmt
);
644 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt_vec_info stmt_vinfo
= worklist
.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
,
658 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
678 case vect_reduction_def
:
679 gcc_assert (relevant
!= vect_unused_in_scope
);
680 if (relevant
!= vect_unused_in_scope
681 && relevant
!= vect_used_in_scope
682 && relevant
!= vect_used_by_reduction
683 && relevant
!= vect_used_only_live
)
684 return opt_result::failure_at
685 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
688 case vect_nested_cycle
:
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_outer_by_reduction
691 && relevant
!= vect_used_in_outer
)
692 return opt_result::failure_at
693 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
696 case vect_double_reduction_def
:
697 if (relevant
!= vect_unused_in_scope
698 && relevant
!= vect_used_by_reduction
699 && relevant
!= vect_used_only_live
)
700 return opt_result::failure_at
701 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
708 if (is_pattern_stmt_p (stmt_vinfo
))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
715 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
716 tree op
= gimple_assign_rhs1 (assign
);
719 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
722 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
723 loop_vinfo
, relevant
, &worklist
, false);
726 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
727 loop_vinfo
, relevant
, &worklist
, false);
732 for (; i
< gimple_num_ops (assign
); i
++)
734 op
= gimple_op (assign
, i
);
735 if (TREE_CODE (op
) == SSA_NAME
)
738 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
745 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
747 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
749 tree arg
= gimple_call_arg (call
, i
);
751 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
759 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
761 tree op
= USE_FROM_PTR (use_p
);
763 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
771 gather_scatter_info gs_info
;
772 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
775 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
784 } /* while worklist */
786 return opt_result::success ();
789 /* Compute the prologue cost for invariant or constant operands. */
792 vect_prologue_cost_for_slp_op (vec_info
*vinfo
,
793 slp_tree node
, stmt_vec_info stmt_info
,
794 unsigned opno
, enum vect_def_type dt
,
795 stmt_vector_for_cost
*cost_vec
)
797 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
798 tree op
= gimple_op (stmt
, opno
);
799 unsigned prologue_cost
= 0;
801 /* Without looking at the actual initializer a vector of
802 constants can be implemented as load from the constant pool.
803 When all elements are the same we can use a splat. */
804 tree vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), node
);
805 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
806 unsigned num_vects_to_check
;
807 unsigned HOST_WIDE_INT const_nunits
;
809 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
810 && ! multiple_p (const_nunits
, group_size
))
812 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
813 nelt_limit
= const_nunits
;
817 /* If either the vector has variable length or the vectors
818 are composed of repeated whole groups we only need to
819 cost construction once. All vectors will be the same. */
820 num_vects_to_check
= 1;
821 nelt_limit
= group_size
;
823 tree elt
= NULL_TREE
;
825 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
827 unsigned si
= j
% group_size
;
829 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
830 /* ??? We're just tracking whether all operands of a single
831 vector initializer are the same, ideally we'd check if
832 we emitted the same one already. */
833 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
837 if (nelt
== nelt_limit
)
839 /* ??? We need to pass down stmt_info for a vector type
840 even if it points to the wrong stmt. */
841 prologue_cost
+= record_stmt_cost
843 dt
== vect_external_def
844 ? (elt
? scalar_to_vec
: vec_construct
)
846 stmt_info
, 0, vect_prologue
);
851 return prologue_cost
;
854 /* Function vect_model_simple_cost.
856 Models cost for simple operations, i.e. those that only emit ncopies of a
857 single op. Right now, this does not account for multiple insns that could
858 be generated for the single vector op. We will handle that shortly. */
861 vect_model_simple_cost (vec_info
*vinfo
,
862 stmt_vec_info stmt_info
, int ncopies
,
863 enum vect_def_type
*dt
,
866 stmt_vector_for_cost
*cost_vec
,
867 vect_cost_for_stmt kind
= vector_stmt
)
869 int inside_cost
= 0, prologue_cost
= 0;
871 gcc_assert (cost_vec
!= NULL
);
873 /* ??? Somehow we need to fix this at the callers. */
875 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
879 /* Scan operands and account for prologue cost of constants/externals.
880 ??? This over-estimates cost for multiple uses and should be
882 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
883 tree lhs
= gimple_get_lhs (stmt
);
884 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
886 tree op
= gimple_op (stmt
, i
);
887 enum vect_def_type dt
;
888 if (!op
|| op
== lhs
)
890 if (vect_is_simple_use (op
, vinfo
, &dt
)
891 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
892 prologue_cost
+= vect_prologue_cost_for_slp_op (vinfo
, node
,
898 /* Cost the "broadcast" of a scalar operand in to a vector operand.
899 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
901 for (int i
= 0; i
< ndts
; i
++)
902 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
903 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
904 stmt_info
, 0, vect_prologue
);
906 /* Adjust for two-operator SLP nodes. */
907 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
910 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
911 stmt_info
, 0, vect_body
);
914 /* Pass the inside-of-loop statements to the target-specific cost model. */
915 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
916 stmt_info
, 0, vect_body
);
918 if (dump_enabled_p ())
919 dump_printf_loc (MSG_NOTE
, vect_location
,
920 "vect_model_simple_cost: inside_cost = %d, "
921 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
925 /* Model cost for type demotion and promotion operations. PWR is
926 normally zero for single-step promotions and demotions. It will be
927 one if two-step promotion/demotion is required, and so on. NCOPIES
928 is the number of vector results (and thus number of instructions)
929 for the narrowest end of the operation chain. Each additional
930 step doubles the number of instructions required. */
933 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
934 enum vect_def_type
*dt
,
935 unsigned int ncopies
, int pwr
,
936 stmt_vector_for_cost
*cost_vec
)
939 int inside_cost
= 0, prologue_cost
= 0;
941 for (i
= 0; i
< pwr
+ 1; i
++)
943 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
944 stmt_info
, 0, vect_body
);
948 /* FORNOW: Assuming maximum 2 args per stmts. */
949 for (i
= 0; i
< 2; i
++)
950 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
951 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
952 stmt_info
, 0, vect_prologue
);
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE
, vect_location
,
956 "vect_model_promotion_demotion_cost: inside_cost = %d, "
957 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
960 /* Returns true if the current function returns DECL. */
963 cfun_returns (tree decl
)
967 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
969 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
972 if (gimple_return_retval (ret
) == decl
)
974 /* We often end up with an aggregate copy to the result decl,
975 handle that case as well. First skip intermediate clobbers
980 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
982 while (gimple_clobber_p (def
));
983 if (is_a
<gassign
*> (def
)
984 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
985 && gimple_assign_rhs1 (def
) == decl
)
991 /* Function vect_model_store_cost
993 Models cost for stores. In the case of grouped accesses, one access
994 has the overhead of the grouped access attributed to it. */
997 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
998 enum vect_def_type dt
,
999 vect_memory_access_type memory_access_type
,
1000 vec_load_store_type vls_type
, slp_tree slp_node
,
1001 stmt_vector_for_cost
*cost_vec
)
1003 unsigned int inside_cost
= 0, prologue_cost
= 0;
1004 stmt_vec_info first_stmt_info
= stmt_info
;
1005 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1007 /* ??? Somehow we need to fix this at the callers. */
1009 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1011 if (vls_type
== VLS_STORE_INVARIANT
)
1014 prologue_cost
+= vect_prologue_cost_for_slp_op (vinfo
, slp_node
,
1018 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1019 stmt_info
, 0, vect_prologue
);
1022 /* Grouped stores update all elements in the group at once,
1023 so we want the DR for the first statement. */
1024 if (!slp_node
&& grouped_access_p
)
1025 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1027 /* True if we should include any once-per-group costs as well as
1028 the cost of the statement itself. For SLP we only get called
1029 once per group anyhow. */
1030 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1032 /* We assume that the cost of a single store-lanes instruction is
1033 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1034 access is instead being provided by a permute-and-store operation,
1035 include the cost of the permutes. */
1037 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1039 /* Uses a high and low interleave or shuffle operations for each
1041 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1042 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1043 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1044 stmt_info
, 0, vect_body
);
1046 if (dump_enabled_p ())
1047 dump_printf_loc (MSG_NOTE
, vect_location
,
1048 "vect_model_store_cost: strided group_size = %d .\n",
1052 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1053 /* Costs of the stores. */
1054 if (memory_access_type
== VMAT_ELEMENTWISE
1055 || memory_access_type
== VMAT_GATHER_SCATTER
)
1057 /* N scalar stores plus extracting the elements. */
1058 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1059 inside_cost
+= record_stmt_cost (cost_vec
,
1060 ncopies
* assumed_nunits
,
1061 scalar_store
, stmt_info
, 0, vect_body
);
1064 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1066 if (memory_access_type
== VMAT_ELEMENTWISE
1067 || memory_access_type
== VMAT_STRIDED_SLP
)
1069 /* N scalar stores plus extracting the elements. */
1070 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1071 inside_cost
+= record_stmt_cost (cost_vec
,
1072 ncopies
* assumed_nunits
,
1073 vec_to_scalar
, stmt_info
, 0, vect_body
);
1076 /* When vectorizing a store into the function result assign
1077 a penalty if the function returns in a multi-register location.
1078 In this case we assume we'll end up with having to spill the
1079 vector result and do piecewise loads as a conservative estimate. */
1080 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1082 && (TREE_CODE (base
) == RESULT_DECL
1083 || (DECL_P (base
) && cfun_returns (base
)))
1084 && !aggregate_value_p (base
, cfun
->decl
))
1086 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1087 /* ??? Handle PARALLEL in some way. */
1090 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1091 /* Assume that a single reg-reg move is possible and cheap,
1092 do not account for vector to gp register move cost. */
1096 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1098 stmt_info
, 0, vect_epilogue
);
1100 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1102 stmt_info
, 0, vect_epilogue
);
1107 if (dump_enabled_p ())
1108 dump_printf_loc (MSG_NOTE
, vect_location
,
1109 "vect_model_store_cost: inside_cost = %d, "
1110 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1114 /* Calculate cost of DR's memory access. */
1116 vect_get_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1117 unsigned int *inside_cost
,
1118 stmt_vector_for_cost
*body_cost_vec
)
1120 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1121 int alignment_support_scheme
1122 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1124 switch (alignment_support_scheme
)
1128 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1129 vector_store
, stmt_info
, 0,
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE
, vect_location
,
1134 "vect_model_store_cost: aligned.\n");
1138 case dr_unaligned_supported
:
1140 /* Here, we assign an additional cost for the unaligned store. */
1141 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1142 unaligned_store
, stmt_info
,
1143 DR_MISALIGNMENT (dr_info
),
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_store_cost: unaligned supported by "
1152 case dr_unaligned_unsupported
:
1154 *inside_cost
= VECT_MAX_COST
;
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1158 "vect_model_store_cost: unsupported access.\n");
1168 /* Function vect_model_load_cost
1170 Models cost for loads. In the case of grouped accesses, one access has
1171 the overhead of the grouped access attributed to it. Since unaligned
1172 accesses are supported for loads, we also account for the costs of the
1173 access scheme chosen. */
1176 vect_model_load_cost (vec_info
*vinfo
,
1177 stmt_vec_info stmt_info
, unsigned ncopies
,
1178 vect_memory_access_type memory_access_type
,
1179 slp_instance instance
,
1181 stmt_vector_for_cost
*cost_vec
)
1183 unsigned int inside_cost
= 0, prologue_cost
= 0;
1184 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1186 gcc_assert (cost_vec
);
1188 /* ??? Somehow we need to fix this at the callers. */
1190 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1192 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1194 /* If the load is permuted then the alignment is determined by
1195 the first group element not by the first scalar stmt DR. */
1196 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1197 /* Record the cost for the permutation. */
1199 unsigned assumed_nunits
1200 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1201 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1202 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1203 slp_vf
, instance
, true,
1205 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1206 first_stmt_info
, 0, vect_body
);
1207 /* And adjust the number of loads performed. This handles
1208 redundancies as well as loads that are later dead. */
1209 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1210 bitmap_clear (perm
);
1211 for (unsigned i
= 0;
1212 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1213 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1215 bool load_seen
= false;
1216 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1218 if (i
% assumed_nunits
== 0)
1224 if (bitmap_bit_p (perm
, i
))
1230 <= (DR_GROUP_SIZE (first_stmt_info
)
1231 - DR_GROUP_GAP (first_stmt_info
)
1232 + assumed_nunits
- 1) / assumed_nunits
);
1235 /* Grouped loads read all elements in the group at once,
1236 so we want the DR for the first statement. */
1237 stmt_vec_info first_stmt_info
= stmt_info
;
1238 if (!slp_node
&& grouped_access_p
)
1239 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1241 /* True if we should include any once-per-group costs as well as
1242 the cost of the statement itself. For SLP we only get called
1243 once per group anyhow. */
1244 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1246 /* We assume that the cost of a single load-lanes instruction is
1247 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1248 access is instead being provided by a load-and-permute operation,
1249 include the cost of the permutes. */
1251 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1253 /* Uses an even and odd extract operations or shuffle operations
1254 for each needed permute. */
1255 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1256 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1257 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1258 stmt_info
, 0, vect_body
);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE
, vect_location
,
1262 "vect_model_load_cost: strided group_size = %d .\n",
1266 /* The loads themselves. */
1267 if (memory_access_type
== VMAT_ELEMENTWISE
1268 || memory_access_type
== VMAT_GATHER_SCATTER
)
1270 /* N scalar loads plus gathering them into a vector. */
1271 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1272 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1273 inside_cost
+= record_stmt_cost (cost_vec
,
1274 ncopies
* assumed_nunits
,
1275 scalar_load
, stmt_info
, 0, vect_body
);
1278 vect_get_load_cost (vinfo
, stmt_info
, ncopies
, first_stmt_p
,
1279 &inside_cost
, &prologue_cost
,
1280 cost_vec
, cost_vec
, true);
1281 if (memory_access_type
== VMAT_ELEMENTWISE
1282 || memory_access_type
== VMAT_STRIDED_SLP
)
1283 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1284 stmt_info
, 0, vect_body
);
1286 if (dump_enabled_p ())
1287 dump_printf_loc (MSG_NOTE
, vect_location
,
1288 "vect_model_load_cost: inside_cost = %d, "
1289 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1293 /* Calculate cost of DR's memory access. */
1295 vect_get_load_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1296 bool add_realign_cost
, unsigned int *inside_cost
,
1297 unsigned int *prologue_cost
,
1298 stmt_vector_for_cost
*prologue_cost_vec
,
1299 stmt_vector_for_cost
*body_cost_vec
,
1300 bool record_prologue_costs
)
1302 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1303 int alignment_support_scheme
1304 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1306 switch (alignment_support_scheme
)
1310 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1311 stmt_info
, 0, vect_body
);
1313 if (dump_enabled_p ())
1314 dump_printf_loc (MSG_NOTE
, vect_location
,
1315 "vect_model_load_cost: aligned.\n");
1319 case dr_unaligned_supported
:
1321 /* Here, we assign an additional cost for the unaligned load. */
1322 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1323 unaligned_load
, stmt_info
,
1324 DR_MISALIGNMENT (dr_info
),
1327 if (dump_enabled_p ())
1328 dump_printf_loc (MSG_NOTE
, vect_location
,
1329 "vect_model_load_cost: unaligned supported by "
1334 case dr_explicit_realign
:
1336 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1337 vector_load
, stmt_info
, 0, vect_body
);
1338 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1339 vec_perm
, stmt_info
, 0, vect_body
);
1341 /* FIXME: If the misalignment remains fixed across the iterations of
1342 the containing loop, the following cost should be added to the
1344 if (targetm
.vectorize
.builtin_mask_for_load
)
1345 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1346 stmt_info
, 0, vect_body
);
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE
, vect_location
,
1350 "vect_model_load_cost: explicit realign\n");
1354 case dr_explicit_realign_optimized
:
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_NOTE
, vect_location
,
1358 "vect_model_load_cost: unaligned software "
1361 /* Unaligned software pipeline has a load of an address, an initial
1362 load, and possibly a mask operation to "prime" the loop. However,
1363 if this is an access in a group of loads, which provide grouped
1364 access, then the above cost should only be considered for one
1365 access in the group. Inside the loop, there is a load op
1366 and a realignment op. */
1368 if (add_realign_cost
&& record_prologue_costs
)
1370 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1371 vector_stmt
, stmt_info
,
1373 if (targetm
.vectorize
.builtin_mask_for_load
)
1374 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1375 vector_stmt
, stmt_info
,
1379 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1380 stmt_info
, 0, vect_body
);
1381 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1382 stmt_info
, 0, vect_body
);
1384 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_NOTE
, vect_location
,
1386 "vect_model_load_cost: explicit realign optimized"
1392 case dr_unaligned_unsupported
:
1394 *inside_cost
= VECT_MAX_COST
;
1396 if (dump_enabled_p ())
1397 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1398 "vect_model_load_cost: unsupported access.\n");
1407 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1408 the loop preheader for the vectorized stmt STMT_VINFO. */
1411 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1412 gimple_stmt_iterator
*gsi
)
1415 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1418 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1422 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1426 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1429 pe
= loop_preheader_edge (loop
);
1430 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1431 gcc_assert (!new_bb
);
1435 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
1437 gimple_stmt_iterator gsi_bb_start
;
1439 gcc_assert (bb_vinfo
);
1440 bb
= BB_VINFO_BB (bb_vinfo
);
1441 gsi_bb_start
= gsi_after_labels (bb
);
1442 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1446 if (dump_enabled_p ())
1447 dump_printf_loc (MSG_NOTE
, vect_location
,
1448 "created new init_stmt: %G", new_stmt
);
1451 /* Function vect_init_vector.
1453 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1454 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1455 vector type a vector with all elements equal to VAL is created first.
1456 Place the initialization at GSI if it is not NULL. Otherwise, place the
1457 initialization at the loop preheader.
1458 Return the DEF of INIT_STMT.
1459 It will be used in the vectorization of STMT_INFO. */
1462 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1463 gimple_stmt_iterator
*gsi
)
1468 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1469 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1471 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1472 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1474 /* Scalar boolean value should be transformed into
1475 all zeros or all ones value before building a vector. */
1476 if (VECTOR_BOOLEAN_TYPE_P (type
))
1478 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1479 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1481 if (CONSTANT_CLASS_P (val
))
1482 val
= integer_zerop (val
) ? false_val
: true_val
;
1485 new_temp
= make_ssa_name (TREE_TYPE (type
));
1486 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1487 val
, true_val
, false_val
);
1488 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1494 gimple_seq stmts
= NULL
;
1495 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1496 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1497 TREE_TYPE (type
), val
);
1499 /* ??? Condition vectorization expects us to do
1500 promotion of invariant/external defs. */
1501 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1502 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1503 !gsi_end_p (gsi2
); )
1505 init_stmt
= gsi_stmt (gsi2
);
1506 gsi_remove (&gsi2
, false);
1507 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1511 val
= build_vector_from_val (type
, val
);
1514 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1515 init_stmt
= gimple_build_assign (new_temp
, val
);
1516 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1520 /* Function vect_get_vec_def_for_operand_1.
1522 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1523 with type DT that will be used in the vectorized stmt. */
1526 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1527 enum vect_def_type dt
)
1530 stmt_vec_info vec_stmt_info
;
1534 /* operand is a constant or a loop invariant. */
1535 case vect_constant_def
:
1536 case vect_external_def
:
1537 /* Code should use vect_get_vec_def_for_operand. */
1540 /* Operand is defined by a loop header phi. In case of nested
1541 cycles we also may have uses of the backedge def. */
1542 case vect_reduction_def
:
1543 case vect_double_reduction_def
:
1544 case vect_nested_cycle
:
1545 case vect_induction_def
:
1546 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1547 || dt
== vect_nested_cycle
);
1550 /* operand is defined inside the loop. */
1551 case vect_internal_def
:
1553 /* Get the def from the vectorized stmt. */
1554 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1555 /* Get vectorized pattern statement. */
1557 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1558 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1559 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1560 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1561 gcc_assert (vec_stmt_info
);
1562 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1563 vec_oprnd
= PHI_RESULT (phi
);
1565 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1575 /* Function vect_get_vec_def_for_operand.
1577 OP is an operand in STMT_VINFO. This function returns a (vector) def
1578 that will be used in the vectorized stmt for STMT_VINFO.
1580 In the case that OP is an SSA_NAME which is defined in the loop, then
1581 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1583 In case OP is an invariant or constant, a new stmt that creates a vector def
1584 needs to be introduced. VECTYPE may be used to specify a required type for
1585 vector invariant. */
1588 vect_get_vec_def_for_operand (vec_info
*vinfo
,
1589 tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1592 enum vect_def_type dt
;
1594 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1596 if (dump_enabled_p ())
1597 dump_printf_loc (MSG_NOTE
, vect_location
,
1598 "vect_get_vec_def_for_operand: %T\n", op
);
1600 stmt_vec_info def_stmt_info
;
1601 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1602 &def_stmt_info
, &def_stmt
);
1603 gcc_assert (is_simple_use
);
1604 if (def_stmt
&& dump_enabled_p ())
1605 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1607 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1609 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1613 vector_type
= vectype
;
1614 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1615 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1616 vector_type
= truth_type_for (stmt_vectype
);
1618 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1620 gcc_assert (vector_type
);
1621 return vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1624 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1628 /* Function vect_get_vec_def_for_stmt_copy
1630 Return a vector-def for an operand. This function is used when the
1631 vectorized stmt to be created (by the caller to this function) is a "copy"
1632 created in case the vectorized result cannot fit in one vector, and several
1633 copies of the vector-stmt are required. In this case the vector-def is
1634 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1635 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1638 In case the vectorization factor (VF) is bigger than the number
1639 of elements that can fit in a vectype (nunits), we have to generate
1640 more than one vector stmt to vectorize the scalar stmt. This situation
1641 arises when there are multiple data-types operated upon in the loop; the
1642 smallest data-type determines the VF, and as a result, when vectorizing
1643 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1644 vector stmt (each computing a vector of 'nunits' results, and together
1645 computing 'VF' results in each iteration). This function is called when
1646 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1647 which VF=16 and nunits=4, so the number of copies required is 4):
1649 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1651 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1652 VS1.1: vx.1 = memref1 VS1.2
1653 VS1.2: vx.2 = memref2 VS1.3
1654 VS1.3: vx.3 = memref3
1656 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1657 VSnew.1: vz1 = vx.1 + ... VSnew.2
1658 VSnew.2: vz2 = vx.2 + ... VSnew.3
1659 VSnew.3: vz3 = vx.3 + ...
1661 The vectorization of S1 is explained in vectorizable_load.
1662 The vectorization of S2:
1663 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1664 the function 'vect_get_vec_def_for_operand' is called to
1665 get the relevant vector-def for each operand of S2. For operand x it
1666 returns the vector-def 'vx.0'.
1668 To create the remaining copies of the vector-stmt (VSnew.j), this
1669 function is called to get the relevant vector-def for each operand. It is
1670 obtained from the respective VS1.j stmt, which is recorded in the
1671 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1673 For example, to obtain the vector-def 'vx.1' in order to create the
1674 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1675 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1676 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1677 and return its def ('vx.1').
1678 Overall, to create the above sequence this function will be called 3 times:
1679 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1680 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1681 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1684 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1686 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1688 /* Do nothing; can reuse same def. */
1691 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1692 gcc_assert (def_stmt_info
);
1693 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1694 vec_oprnd
= PHI_RESULT (phi
);
1696 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1701 /* Get vectorized definitions for the operands to create a copy of an original
1702 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1705 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1706 vec
<tree
> *vec_oprnds0
,
1707 vec
<tree
> *vec_oprnds1
)
1709 tree vec_oprnd
= vec_oprnds0
->pop ();
1711 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1712 vec_oprnds0
->quick_push (vec_oprnd
);
1714 if (vec_oprnds1
&& vec_oprnds1
->length ())
1716 vec_oprnd
= vec_oprnds1
->pop ();
1717 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1718 vec_oprnds1
->quick_push (vec_oprnd
);
1723 /* Get vectorized definitions for OP0 and OP1. */
1726 vect_get_vec_defs (vec_info
*vinfo
, tree op0
, tree op1
, stmt_vec_info stmt_info
,
1727 vec
<tree
> *vec_oprnds0
,
1728 vec
<tree
> *vec_oprnds1
,
1733 auto_vec
<vec
<tree
> > vec_defs (SLP_TREE_CHILDREN (slp_node
).length ());
1734 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
, op1
? 2 : 1);
1735 *vec_oprnds0
= vec_defs
[0];
1737 *vec_oprnds1
= vec_defs
[1];
1743 vec_oprnds0
->create (1);
1744 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, op0
, stmt_info
);
1745 vec_oprnds0
->quick_push (vec_oprnd
);
1749 vec_oprnds1
->create (1);
1750 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, op1
, stmt_info
);
1751 vec_oprnds1
->quick_push (vec_oprnd
);
1756 /* Helper function called by vect_finish_replace_stmt and
1757 vect_finish_stmt_generation. Set the location of the new
1758 statement and create and return a stmt_vec_info for it. */
1760 static stmt_vec_info
1761 vect_finish_stmt_generation_1 (vec_info
*vinfo
,
1762 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1764 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1766 if (dump_enabled_p ())
1767 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1769 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1771 /* While EH edges will generally prevent vectorization, stmt might
1772 e.g. be in a must-not-throw region. Ensure newly created stmts
1773 that could throw are part of the same region. */
1774 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1775 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1776 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1778 return vec_stmt_info
;
1781 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1782 which sets the same scalar result as STMT_INFO did. Create and return a
1783 stmt_vec_info for VEC_STMT. */
1786 vect_finish_replace_stmt (vec_info
*vinfo
,
1787 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1789 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1790 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1792 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1793 gsi_replace (&gsi
, vec_stmt
, true);
1795 return vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1798 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1799 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1802 vect_finish_stmt_generation (vec_info
*vinfo
,
1803 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1804 gimple_stmt_iterator
*gsi
)
1806 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1808 if (!gsi_end_p (*gsi
)
1809 && gimple_has_mem_ops (vec_stmt
))
1811 gimple
*at_stmt
= gsi_stmt (*gsi
);
1812 tree vuse
= gimple_vuse (at_stmt
);
1813 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1815 tree vdef
= gimple_vdef (at_stmt
);
1816 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1817 /* If we have an SSA vuse and insert a store, update virtual
1818 SSA form to avoid triggering the renamer. Do so only
1819 if we can easily see all uses - which is what almost always
1820 happens with the way vectorized stmts are inserted. */
1821 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1822 && ((is_gimple_assign (vec_stmt
)
1823 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1824 || (is_gimple_call (vec_stmt
)
1825 && !(gimple_call_flags (vec_stmt
)
1826 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1828 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1829 gimple_set_vdef (vec_stmt
, new_vdef
);
1830 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1834 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1835 return vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1838 /* We want to vectorize a call to combined function CFN with function
1839 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1840 as the types of all inputs. Check whether this is possible using
1841 an internal function, returning its code if so or IFN_LAST if not. */
1844 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1845 tree vectype_out
, tree vectype_in
)
1848 if (internal_fn_p (cfn
))
1849 ifn
= as_internal_fn (cfn
);
1851 ifn
= associated_internal_fn (fndecl
);
1852 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1854 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1855 if (info
.vectorizable
)
1857 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1858 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1859 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1860 OPTIMIZE_FOR_SPEED
))
1868 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1869 gimple_stmt_iterator
*);
1871 /* Check whether a load or store statement in the loop described by
1872 LOOP_VINFO is possible in a fully-masked loop. This is testing
1873 whether the vectorizer pass has the appropriate support, as well as
1874 whether the target does.
1876 VLS_TYPE says whether the statement is a load or store and VECTYPE
1877 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1878 says how the load or store is going to be implemented and GROUP_SIZE
1879 is the number of load or store statements in the containing group.
1880 If the access is a gather load or scatter store, GS_INFO describes
1881 its arguments. If the load or store is conditional, SCALAR_MASK is the
1882 condition under which it occurs.
1884 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1885 supported, otherwise record the required mask types. */
1888 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1889 vec_load_store_type vls_type
, int group_size
,
1890 vect_memory_access_type memory_access_type
,
1891 gather_scatter_info
*gs_info
, tree scalar_mask
)
1893 /* Invariant loads need no special support. */
1894 if (memory_access_type
== VMAT_INVARIANT
)
1897 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1898 machine_mode vecmode
= TYPE_MODE (vectype
);
1899 bool is_load
= (vls_type
== VLS_LOAD
);
1900 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1903 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1904 : !vect_store_lanes_supported (vectype
, group_size
, true))
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1908 "can't use a fully-masked loop because the"
1909 " target doesn't have an appropriate masked"
1910 " load/store-lanes instruction.\n");
1911 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1914 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1915 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1919 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1921 internal_fn ifn
= (is_load
1922 ? IFN_MASK_GATHER_LOAD
1923 : IFN_MASK_SCATTER_STORE
);
1924 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1925 gs_info
->memory_type
,
1926 gs_info
->offset_vectype
,
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1931 "can't use a fully-masked loop because the"
1932 " target doesn't have an appropriate masked"
1933 " gather load or scatter store instruction.\n");
1934 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1937 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1938 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1942 if (memory_access_type
!= VMAT_CONTIGUOUS
1943 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1945 /* Element X of the data must come from iteration i * VF + X of the
1946 scalar loop. We need more work to support other mappings. */
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1949 "can't use a fully-masked loop because an access"
1950 " isn't contiguous.\n");
1951 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1955 machine_mode mask_mode
;
1956 if (!VECTOR_MODE_P (vecmode
)
1957 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1958 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1960 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1962 "can't use a fully-masked loop because the target"
1963 " doesn't have the appropriate masked load or"
1965 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1968 /* We might load more scalars than we need for permuting SLP loads.
1969 We checked in get_group_load_store_type that the extra elements
1970 don't leak into a new vector. */
1971 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1972 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1973 unsigned int nvectors
;
1974 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1975 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1980 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1981 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1982 that needs to be applied to all loads and stores in a vectorized loop.
1983 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1985 MASK_TYPE is the type of both masks. If new statements are needed,
1986 insert them before GSI. */
1989 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1990 gimple_stmt_iterator
*gsi
)
1992 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1996 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1997 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1998 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1999 vec_mask
, loop_mask
);
2000 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
2004 /* Determine whether we can use a gather load or scatter store to vectorize
2005 strided load or store STMT_INFO by truncating the current offset to a
2006 smaller width. We need to be able to construct an offset vector:
2008 { 0, X, X*2, X*3, ... }
2010 without loss of precision, where X is STMT_INFO's DR_STEP.
2012 Return true if this is possible, describing the gather load or scatter
2013 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2016 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
2017 loop_vec_info loop_vinfo
, bool masked_p
,
2018 gather_scatter_info
*gs_info
)
2020 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2021 data_reference
*dr
= dr_info
->dr
;
2022 tree step
= DR_STEP (dr
);
2023 if (TREE_CODE (step
) != INTEGER_CST
)
2025 /* ??? Perhaps we could use range information here? */
2026 if (dump_enabled_p ())
2027 dump_printf_loc (MSG_NOTE
, vect_location
,
2028 "cannot truncate variable step.\n");
2032 /* Get the number of bits in an element. */
2033 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2034 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2035 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2037 /* Set COUNT to the upper limit on the number of elements - 1.
2038 Start with the maximum vectorization factor. */
2039 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2041 /* Try lowering COUNT to the number of scalar latch iterations. */
2042 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2043 widest_int max_iters
;
2044 if (max_loop_iterations (loop
, &max_iters
)
2045 && max_iters
< count
)
2046 count
= max_iters
.to_shwi ();
2048 /* Try scales of 1 and the element size. */
2049 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
2050 wi::overflow_type overflow
= wi::OVF_NONE
;
2051 for (int i
= 0; i
< 2; ++i
)
2053 int scale
= scales
[i
];
2055 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2058 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2059 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2062 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2063 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
2065 /* Find the narrowest viable offset type. */
2066 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
2067 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
2070 /* See whether the target supports the operation with an offset
2071 no narrower than OFFSET_TYPE. */
2072 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2073 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
2074 vectype
, memory_type
, offset_type
, scale
,
2075 &gs_info
->ifn
, &gs_info
->offset_vectype
))
2078 gs_info
->decl
= NULL_TREE
;
2079 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2080 but we don't need to store that here. */
2081 gs_info
->base
= NULL_TREE
;
2082 gs_info
->element_type
= TREE_TYPE (vectype
);
2083 gs_info
->offset
= fold_convert (offset_type
, step
);
2084 gs_info
->offset_dt
= vect_constant_def
;
2085 gs_info
->scale
= scale
;
2086 gs_info
->memory_type
= memory_type
;
2090 if (overflow
&& dump_enabled_p ())
2091 dump_printf_loc (MSG_NOTE
, vect_location
,
2092 "truncating gather/scatter offset to %d bits"
2093 " might change its value.\n", element_bits
);
2098 /* Return true if we can use gather/scatter internal functions to
2099 vectorize STMT_INFO, which is a grouped or strided load or store.
2100 MASKED_P is true if load or store is conditional. When returning
2101 true, fill in GS_INFO with the information required to perform the
2105 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2106 loop_vec_info loop_vinfo
, bool masked_p
,
2107 gather_scatter_info
*gs_info
)
2109 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2111 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2114 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
2115 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2117 gcc_assert (TYPE_PRECISION (new_offset_type
)
2118 >= TYPE_PRECISION (old_offset_type
));
2119 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_NOTE
, vect_location
,
2123 "using gather/scatter for strided/grouped access,"
2124 " scale = %d\n", gs_info
->scale
);
2129 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2130 elements with a known constant step. Return -1 if that step
2131 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2134 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2136 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2137 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2141 /* If the target supports a permute mask that reverses the elements in
2142 a vector of type VECTYPE, return that mask, otherwise return null. */
2145 perm_mask_for_reverse (tree vectype
)
2147 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2149 /* The encoding has a single stepped pattern. */
2150 vec_perm_builder
sel (nunits
, 1, 3);
2151 for (int i
= 0; i
< 3; ++i
)
2152 sel
.quick_push (nunits
- 1 - i
);
2154 vec_perm_indices
indices (sel
, 1, nunits
);
2155 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2157 return vect_gen_perm_mask_checked (vectype
, indices
);
2160 /* A subroutine of get_load_store_type, with a subset of the same
2161 arguments. Handle the case where STMT_INFO is a load or store that
2162 accesses consecutive elements with a negative step. */
2164 static vect_memory_access_type
2165 get_negative_load_store_type (vec_info
*vinfo
,
2166 stmt_vec_info stmt_info
, tree vectype
,
2167 vec_load_store_type vls_type
,
2168 unsigned int ncopies
)
2170 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2171 dr_alignment_support alignment_support_scheme
;
2175 if (dump_enabled_p ())
2176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2177 "multiple types with negative step.\n");
2178 return VMAT_ELEMENTWISE
;
2181 alignment_support_scheme
= vect_supportable_dr_alignment (vinfo
,
2183 if (alignment_support_scheme
!= dr_aligned
2184 && alignment_support_scheme
!= dr_unaligned_supported
)
2186 if (dump_enabled_p ())
2187 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2188 "negative step but alignment required.\n");
2189 return VMAT_ELEMENTWISE
;
2192 if (vls_type
== VLS_STORE_INVARIANT
)
2194 if (dump_enabled_p ())
2195 dump_printf_loc (MSG_NOTE
, vect_location
,
2196 "negative step with invariant source;"
2197 " no permute needed.\n");
2198 return VMAT_CONTIGUOUS_DOWN
;
2201 if (!perm_mask_for_reverse (vectype
))
2203 if (dump_enabled_p ())
2204 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2205 "negative step and reversing not supported.\n");
2206 return VMAT_ELEMENTWISE
;
2209 return VMAT_CONTIGUOUS_REVERSE
;
2212 /* STMT_INFO is either a masked or unconditional store. Return the value
2216 vect_get_store_rhs (stmt_vec_info stmt_info
)
2218 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2220 gcc_assert (gimple_assign_single_p (assign
));
2221 return gimple_assign_rhs1 (assign
);
2223 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2225 internal_fn ifn
= gimple_call_internal_fn (call
);
2226 int index
= internal_fn_stored_value_index (ifn
);
2227 gcc_assert (index
>= 0);
2228 return gimple_call_arg (call
, index
);
2233 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2235 This function returns a vector type which can be composed with NETLS pieces,
2236 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2237 same vector size as the return vector. It checks target whether supports
2238 pieces-size vector mode for construction firstly, if target fails to, check
2239 pieces-size scalar mode for construction further. It returns NULL_TREE if
2240 fails to find the available composition.
2242 For example, for (vtype=V16QI, nelts=4), we can probably get:
2243 - V16QI with PTYPE V4QI.
2244 - V4SI with PTYPE SI.
2248 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2250 gcc_assert (VECTOR_TYPE_P (vtype
));
2251 gcc_assert (known_gt (nelts
, 0U));
2253 machine_mode vmode
= TYPE_MODE (vtype
);
2254 if (!VECTOR_MODE_P (vmode
))
2257 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2258 unsigned int pbsize
;
2259 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2261 /* First check if vec_init optab supports construction from
2262 vector pieces directly. */
2263 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2264 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2266 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2267 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2268 != CODE_FOR_nothing
))
2270 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2274 /* Otherwise check if exists an integer type of the same piece size and
2275 if vec_init optab supports construction from it directly. */
2276 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2277 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2278 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2279 != CODE_FOR_nothing
))
2281 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2282 return build_vector_type (*ptype
, nelts
);
2289 /* A subroutine of get_load_store_type, with a subset of the same
2290 arguments. Handle the case where STMT_INFO is part of a grouped load
2293 For stores, the statements in the group are all consecutive
2294 and there is no gap at the end. For loads, the statements in the
2295 group might not be consecutive; there can be gaps between statements
2296 as well as at the end. */
2299 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2300 tree vectype
, bool slp
,
2301 bool masked_p
, vec_load_store_type vls_type
,
2302 vect_memory_access_type
*memory_access_type
,
2303 gather_scatter_info
*gs_info
)
2305 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2306 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2307 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2308 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2309 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2310 bool single_element_p
= (stmt_info
== first_stmt_info
2311 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2312 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2313 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2315 /* True if the vectorized statements would access beyond the last
2316 statement in the group. */
2317 bool overrun_p
= false;
2319 /* True if we can cope with such overrun by peeling for gaps, so that
2320 there is at least one final scalar iteration after the vector loop. */
2321 bool can_overrun_p
= (!masked_p
2322 && vls_type
== VLS_LOAD
2326 /* There can only be a gap at the end of the group if the stride is
2327 known at compile time. */
2328 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2330 /* Stores can't yet have gaps. */
2331 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2335 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2337 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2338 separated by the stride, until we have a complete vector.
2339 Fall back to scalar accesses if that isn't possible. */
2340 if (multiple_p (nunits
, group_size
))
2341 *memory_access_type
= VMAT_STRIDED_SLP
;
2343 *memory_access_type
= VMAT_ELEMENTWISE
;
2347 overrun_p
= loop_vinfo
&& gap
!= 0;
2348 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2350 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2351 "Grouped store with gaps requires"
2352 " non-consecutive accesses\n");
2355 /* An overrun is fine if the trailing elements are smaller
2356 than the alignment boundary B. Every vector access will
2357 be a multiple of B and so we are guaranteed to access a
2358 non-gap element in the same B-sized block. */
2360 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2361 / vect_get_scalar_dr_size (first_dr_info
)))
2364 /* If the gap splits the vector in half and the target
2365 can do half-vector operations avoid the epilogue peeling
2366 by simply loading half of the vector only. Usually
2367 the construction with an upper zero half will be elided. */
2368 dr_alignment_support alignment_support_scheme
;
2372 && (((alignment_support_scheme
2373 = vect_supportable_dr_alignment (vinfo
,
2374 first_dr_info
, false)))
2376 || alignment_support_scheme
== dr_unaligned_supported
)
2377 && known_eq (nunits
, (group_size
- gap
) * 2)
2378 && known_eq (nunits
, group_size
)
2379 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2383 if (overrun_p
&& !can_overrun_p
)
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2387 "Peeling for outer loop is not supported\n");
2390 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2392 *memory_access_type
= get_negative_load_store_type
2393 (vinfo
, stmt_info
, vectype
, vls_type
, 1);
2396 gcc_assert (!loop_vinfo
|| cmp
> 0);
2397 *memory_access_type
= VMAT_CONTIGUOUS
;
2403 /* We can always handle this case using elementwise accesses,
2404 but see if something more efficient is available. */
2405 *memory_access_type
= VMAT_ELEMENTWISE
;
2407 /* If there is a gap at the end of the group then these optimizations
2408 would access excess elements in the last iteration. */
2409 bool would_overrun_p
= (gap
!= 0);
2410 /* An overrun is fine if the trailing elements are smaller than the
2411 alignment boundary B. Every vector access will be a multiple of B
2412 and so we are guaranteed to access a non-gap element in the
2413 same B-sized block. */
2416 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2417 / vect_get_scalar_dr_size (first_dr_info
)))
2418 would_overrun_p
= false;
2420 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2421 && (can_overrun_p
|| !would_overrun_p
)
2422 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2424 /* First cope with the degenerate case of a single-element
2426 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2427 *memory_access_type
= VMAT_CONTIGUOUS
;
2429 /* Otherwise try using LOAD/STORE_LANES. */
2430 if (*memory_access_type
== VMAT_ELEMENTWISE
2431 && (vls_type
== VLS_LOAD
2432 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2433 : vect_store_lanes_supported (vectype
, group_size
,
2436 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2437 overrun_p
= would_overrun_p
;
2440 /* If that fails, try using permuting loads. */
2441 if (*memory_access_type
== VMAT_ELEMENTWISE
2442 && (vls_type
== VLS_LOAD
2443 ? vect_grouped_load_supported (vectype
, single_element_p
,
2445 : vect_grouped_store_supported (vectype
, group_size
)))
2447 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2448 overrun_p
= would_overrun_p
;
2452 /* As a last resort, trying using a gather load or scatter store.
2454 ??? Although the code can handle all group sizes correctly,
2455 it probably isn't a win to use separate strided accesses based
2456 on nearby locations. Or, even if it's a win over scalar code,
2457 it might not be a win over vectorizing at a lower VF, if that
2458 allows us to use contiguous accesses. */
2459 if (*memory_access_type
== VMAT_ELEMENTWISE
2462 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2464 *memory_access_type
= VMAT_GATHER_SCATTER
;
2467 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2469 /* STMT is the leader of the group. Check the operands of all the
2470 stmts of the group. */
2471 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2472 while (next_stmt_info
)
2474 tree op
= vect_get_store_rhs (next_stmt_info
);
2475 enum vect_def_type dt
;
2476 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2480 "use not simple.\n");
2483 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2489 gcc_assert (can_overrun_p
);
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2492 "Data access with gaps requires scalar "
2494 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2500 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2501 if there is a memory access type that the vectorized form can use,
2502 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2503 or scatters, fill in GS_INFO accordingly.
2505 SLP says whether we're performing SLP rather than loop vectorization.
2506 MASKED_P is true if the statement is conditional on a vectorized mask.
2507 VECTYPE is the vector type that the vectorized statements will use.
2508 NCOPIES is the number of vector statements that will be needed. */
2511 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2512 tree vectype
, bool slp
,
2513 bool masked_p
, vec_load_store_type vls_type
,
2514 unsigned int ncopies
,
2515 vect_memory_access_type
*memory_access_type
,
2516 gather_scatter_info
*gs_info
)
2518 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2519 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2520 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2522 *memory_access_type
= VMAT_GATHER_SCATTER
;
2523 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2525 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2526 &gs_info
->offset_dt
,
2527 &gs_info
->offset_vectype
))
2529 if (dump_enabled_p ())
2530 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2531 "%s index use not simple.\n",
2532 vls_type
== VLS_LOAD
? "gather" : "scatter");
2536 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2538 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp
, masked_p
,
2539 vls_type
, memory_access_type
, gs_info
))
2542 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2546 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2548 *memory_access_type
= VMAT_GATHER_SCATTER
;
2550 *memory_access_type
= VMAT_ELEMENTWISE
;
2554 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2556 *memory_access_type
= get_negative_load_store_type
2557 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
);
2560 gcc_assert (vls_type
== VLS_LOAD
);
2561 *memory_access_type
= VMAT_INVARIANT
;
2564 *memory_access_type
= VMAT_CONTIGUOUS
;
2567 if ((*memory_access_type
== VMAT_ELEMENTWISE
2568 || *memory_access_type
== VMAT_STRIDED_SLP
)
2569 && !nunits
.is_constant ())
2571 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2573 "Not using elementwise accesses due to variable "
2574 "vectorization factor.\n");
2578 /* FIXME: At the moment the cost model seems to underestimate the
2579 cost of using elementwise accesses. This check preserves the
2580 traditional behavior until that can be fixed. */
2581 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2582 if (!first_stmt_info
)
2583 first_stmt_info
= stmt_info
;
2584 if (*memory_access_type
== VMAT_ELEMENTWISE
2585 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2586 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2587 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2588 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2590 if (dump_enabled_p ())
2591 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2592 "not falling back to elementwise accesses\n");
2598 /* Return true if boolean argument MASK is suitable for vectorizing
2599 conditional operation STMT_INFO. When returning true, store the type
2600 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2601 in *MASK_VECTYPE_OUT. */
2604 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree mask
,
2605 vect_def_type
*mask_dt_out
,
2606 tree
*mask_vectype_out
)
2608 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2610 if (dump_enabled_p ())
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2612 "mask argument is not a boolean.\n");
2616 if (TREE_CODE (mask
) != SSA_NAME
)
2618 if (dump_enabled_p ())
2619 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2620 "mask argument is not an SSA name.\n");
2624 enum vect_def_type mask_dt
;
2626 if (!vect_is_simple_use (mask
, vinfo
, &mask_dt
, &mask_vectype
))
2628 if (dump_enabled_p ())
2629 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2630 "mask use not simple.\n");
2634 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2636 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2638 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2640 if (dump_enabled_p ())
2641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2642 "could not find an appropriate vector mask type.\n");
2646 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2647 TYPE_VECTOR_SUBPARTS (vectype
)))
2649 if (dump_enabled_p ())
2650 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2651 "vector mask type %T"
2652 " does not match vector data type %T.\n",
2653 mask_vectype
, vectype
);
2658 *mask_dt_out
= mask_dt
;
2659 *mask_vectype_out
= mask_vectype
;
2663 /* Return true if stored value RHS is suitable for vectorizing store
2664 statement STMT_INFO. When returning true, store the type of the
2665 definition in *RHS_DT_OUT, the type of the vectorized store value in
2666 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2669 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree rhs
,
2670 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2671 vec_load_store_type
*vls_type_out
)
2673 /* In the case this is a store from a constant make sure
2674 native_encode_expr can handle it. */
2675 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2677 if (dump_enabled_p ())
2678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2679 "cannot encode constant as a byte sequence.\n");
2683 enum vect_def_type rhs_dt
;
2685 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_dt
, &rhs_vectype
))
2687 if (dump_enabled_p ())
2688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2689 "use not simple.\n");
2693 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2694 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2696 if (dump_enabled_p ())
2697 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2698 "incompatible vector types.\n");
2702 *rhs_dt_out
= rhs_dt
;
2703 *rhs_vectype_out
= rhs_vectype
;
2704 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2705 *vls_type_out
= VLS_STORE_INVARIANT
;
2707 *vls_type_out
= VLS_STORE
;
2711 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2712 Note that we support masks with floating-point type, in which case the
2713 floats are interpreted as a bitmask. */
2716 vect_build_all_ones_mask (vec_info
*vinfo
,
2717 stmt_vec_info stmt_info
, tree masktype
)
2719 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2720 return build_int_cst (masktype
, -1);
2721 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2723 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2724 mask
= build_vector_from_val (masktype
, mask
);
2725 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2727 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2731 for (int j
= 0; j
< 6; ++j
)
2733 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2734 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2735 mask
= build_vector_from_val (masktype
, mask
);
2736 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2741 /* Build an all-zero merge value of type VECTYPE while vectorizing
2742 STMT_INFO as a gather load. */
2745 vect_build_zero_merge_argument (vec_info
*vinfo
,
2746 stmt_vec_info stmt_info
, tree vectype
)
2749 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2750 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2751 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2755 for (int j
= 0; j
< 6; ++j
)
2757 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2758 merge
= build_real (TREE_TYPE (vectype
), r
);
2762 merge
= build_vector_from_val (vectype
, merge
);
2763 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2766 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2767 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2768 the gather load operation. If the load is conditional, MASK is the
2769 unvectorized condition and MASK_DT is its definition type, otherwise
2773 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2774 gimple_stmt_iterator
*gsi
,
2775 stmt_vec_info
*vec_stmt
,
2776 gather_scatter_info
*gs_info
,
2779 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2780 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2781 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2782 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2783 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2784 edge pe
= loop_preheader_edge (loop
);
2785 enum { NARROW
, NONE
, WIDEN
} modifier
;
2786 poly_uint64 gather_off_nunits
2787 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2789 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2790 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2791 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2792 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2793 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2794 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2795 tree scaletype
= TREE_VALUE (arglist
);
2796 tree real_masktype
= masktype
;
2797 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2799 || TREE_CODE (masktype
) == INTEGER_TYPE
2800 || types_compatible_p (srctype
, masktype
)));
2801 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2802 masktype
= truth_type_for (srctype
);
2804 tree mask_halftype
= masktype
;
2805 tree perm_mask
= NULL_TREE
;
2806 tree mask_perm_mask
= NULL_TREE
;
2807 if (known_eq (nunits
, gather_off_nunits
))
2809 else if (known_eq (nunits
* 2, gather_off_nunits
))
2813 /* Currently widening gathers and scatters are only supported for
2814 fixed-length vectors. */
2815 int count
= gather_off_nunits
.to_constant ();
2816 vec_perm_builder
sel (count
, count
, 1);
2817 for (int i
= 0; i
< count
; ++i
)
2818 sel
.quick_push (i
| (count
/ 2));
2820 vec_perm_indices
indices (sel
, 1, count
);
2821 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2824 else if (known_eq (nunits
, gather_off_nunits
* 2))
2828 /* Currently narrowing gathers and scatters are only supported for
2829 fixed-length vectors. */
2830 int count
= nunits
.to_constant ();
2831 vec_perm_builder
sel (count
, count
, 1);
2832 sel
.quick_grow (count
);
2833 for (int i
= 0; i
< count
; ++i
)
2834 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2835 vec_perm_indices
indices (sel
, 2, count
);
2836 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2840 if (mask
&& masktype
== real_masktype
)
2842 for (int i
= 0; i
< count
; ++i
)
2843 sel
[i
] = i
| (count
/ 2);
2844 indices
.new_vector (sel
, 2, count
);
2845 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2848 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2853 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2854 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2856 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2857 if (!is_gimple_min_invariant (ptr
))
2860 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2861 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2862 gcc_assert (!new_bb
);
2865 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2867 tree vec_oprnd0
= NULL_TREE
;
2868 tree vec_mask
= NULL_TREE
;
2869 tree src_op
= NULL_TREE
;
2870 tree mask_op
= NULL_TREE
;
2871 tree prev_res
= NULL_TREE
;
2872 stmt_vec_info prev_stmt_info
= NULL
;
2876 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2877 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2880 for (int j
= 0; j
< ncopies
; ++j
)
2883 if (modifier
== WIDEN
&& (j
& 1))
2884 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2885 perm_mask
, stmt_info
, gsi
);
2888 = vect_get_vec_def_for_operand (vinfo
, gs_info
->offset
, stmt_info
);
2890 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2893 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2895 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2896 TYPE_VECTOR_SUBPARTS (idxtype
)));
2897 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2898 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2899 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2900 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2906 if (mask_perm_mask
&& (j
& 1))
2907 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2908 mask_perm_mask
, stmt_info
, gsi
);
2912 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
);
2913 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2914 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2918 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2920 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2921 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2922 gcc_assert (known_eq (sub1
, sub2
));
2923 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2924 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2926 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2927 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2931 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2933 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2935 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2936 : VEC_UNPACK_LO_EXPR
,
2938 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2944 tree mask_arg
= mask_op
;
2945 if (masktype
!= real_masktype
)
2947 tree utype
, optype
= TREE_TYPE (mask_op
);
2948 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2949 utype
= real_masktype
;
2951 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2952 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2953 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2955 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2956 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2958 if (!useless_type_conversion_p (real_masktype
, utype
))
2960 gcc_assert (TYPE_PRECISION (utype
)
2961 <= TYPE_PRECISION (real_masktype
));
2962 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2963 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2964 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2967 src_op
= build_zero_cst (srctype
);
2969 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2972 stmt_vec_info new_stmt_info
;
2973 if (!useless_type_conversion_p (vectype
, rettype
))
2975 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2976 TYPE_VECTOR_SUBPARTS (rettype
)));
2977 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2978 gimple_call_set_lhs (new_call
, op
);
2979 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
2980 var
= make_ssa_name (vec_dest
);
2981 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2982 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2984 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2988 var
= make_ssa_name (vec_dest
, new_call
);
2989 gimple_call_set_lhs (new_call
, var
);
2991 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
2994 if (modifier
== NARROW
)
3001 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
3003 new_stmt_info
= loop_vinfo
->lookup_def (var
);
3006 if (prev_stmt_info
== NULL
)
3007 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3009 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3010 prev_stmt_info
= new_stmt_info
;
3014 /* Prepare the base and offset in GS_INFO for vectorization.
3015 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3016 to the vectorized offset argument for the first copy of STMT_INFO.
3017 STMT_INFO is the statement described by GS_INFO and LOOP is the
3021 vect_get_gather_scatter_ops (vec_info
*vinfo
,
3022 class loop
*loop
, stmt_vec_info stmt_info
,
3023 gather_scatter_info
*gs_info
,
3024 tree
*dataref_ptr
, tree
*vec_offset
)
3026 gimple_seq stmts
= NULL
;
3027 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
3031 edge pe
= loop_preheader_edge (loop
);
3032 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3033 gcc_assert (!new_bb
);
3035 *vec_offset
= vect_get_vec_def_for_operand (vinfo
, gs_info
->offset
, stmt_info
,
3036 gs_info
->offset_vectype
);
3039 /* Prepare to implement a grouped or strided load or store using
3040 the gather load or scatter store operation described by GS_INFO.
3041 STMT_INFO is the load or store statement.
3043 Set *DATAREF_BUMP to the amount that should be added to the base
3044 address after each copy of the vectorized statement. Set *VEC_OFFSET
3045 to an invariant offset vector in which element I has the value
3046 I * DR_STEP / SCALE. */
3049 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3050 loop_vec_info loop_vinfo
,
3051 gather_scatter_info
*gs_info
,
3052 tree
*dataref_bump
, tree
*vec_offset
)
3054 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3055 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3056 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3059 tree bump
= size_binop (MULT_EXPR
,
3060 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3061 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3062 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
3064 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3066 /* The offset given in GS_INFO can have pointer type, so use the element
3067 type of the vector instead. */
3068 tree offset_type
= TREE_TYPE (gs_info
->offset
);
3069 offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3071 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3072 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3073 ssize_int (gs_info
->scale
));
3074 step
= fold_convert (offset_type
, step
);
3075 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
3077 /* Create {0, X, X*2, X*3, ...}. */
3078 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3079 build_zero_cst (offset_type
), step
);
3081 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3084 /* Return the amount that should be added to a vector pointer to move
3085 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3086 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3090 vect_get_data_ptr_increment (vec_info
*vinfo
,
3091 dr_vec_info
*dr_info
, tree aggr_type
,
3092 vect_memory_access_type memory_access_type
)
3094 if (memory_access_type
== VMAT_INVARIANT
)
3095 return size_zero_node
;
3097 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3098 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3099 if (tree_int_cst_sgn (step
) == -1)
3100 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3104 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3107 vectorizable_bswap (vec_info
*vinfo
,
3108 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3109 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3110 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3113 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3114 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3117 op
= gimple_call_arg (stmt
, 0);
3118 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3119 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3121 /* Multiple types in SLP are handled by creating the appropriate number of
3122 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3127 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3129 gcc_assert (ncopies
>= 1);
3131 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3135 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3136 unsigned word_bytes
;
3137 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3140 /* The encoding uses one stepped pattern for each byte in the word. */
3141 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3142 for (unsigned i
= 0; i
< 3; ++i
)
3143 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3144 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3146 vec_perm_indices
indices (elts
, 1, num_bytes
);
3147 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3152 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3153 DUMP_VECT_SCOPE ("vectorizable_bswap");
3156 record_stmt_cost (cost_vec
,
3157 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3158 record_stmt_cost (cost_vec
,
3159 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3164 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3167 vec
<tree
> vec_oprnds
= vNULL
;
3168 stmt_vec_info new_stmt_info
= NULL
;
3169 stmt_vec_info prev_stmt_info
= NULL
;
3170 for (unsigned j
= 0; j
< ncopies
; j
++)
3174 vect_get_vec_defs (vinfo
, op
, NULL
, stmt_info
, &vec_oprnds
, NULL
,
3177 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3179 /* Arguments are ready. create the new vector stmt. */
3182 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3185 tree tem
= make_ssa_name (char_vectype
);
3186 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3187 char_vectype
, vop
));
3188 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3189 tree tem2
= make_ssa_name (char_vectype
);
3190 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3191 tem
, tem
, bswap_vconst
);
3192 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3193 tem
= make_ssa_name (vectype
);
3194 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3197 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3199 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3206 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3208 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3210 prev_stmt_info
= new_stmt_info
;
3213 vec_oprnds
.release ();
3217 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3218 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3219 in a single step. On success, store the binary pack code in
3223 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3224 tree_code
*convert_code
)
3226 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3227 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3231 int multi_step_cvt
= 0;
3232 auto_vec
<tree
, 8> interm_types
;
3233 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3234 &code
, &multi_step_cvt
, &interm_types
)
3238 *convert_code
= code
;
3242 /* Function vectorizable_call.
3244 Check if STMT_INFO performs a function call that can be vectorized.
3245 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3246 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3247 Return true if STMT_INFO is vectorizable in this way. */
3250 vectorizable_call (vec_info
*vinfo
,
3251 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3252 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3253 stmt_vector_for_cost
*cost_vec
)
3259 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3260 stmt_vec_info prev_stmt_info
;
3261 tree vectype_out
, vectype_in
;
3262 poly_uint64 nunits_in
;
3263 poly_uint64 nunits_out
;
3264 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3265 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3266 tree fndecl
, new_temp
, rhs_type
;
3267 enum vect_def_type dt
[4]
3268 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3269 vect_unknown_def_type
};
3270 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3271 int ndts
= ARRAY_SIZE (dt
);
3273 auto_vec
<tree
, 8> vargs
;
3274 auto_vec
<tree
, 8> orig_vargs
;
3275 enum { NARROW
, NONE
, WIDEN
} modifier
;
3279 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3282 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3286 /* Is STMT_INFO a vectorizable call? */
3287 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3291 if (gimple_call_internal_p (stmt
)
3292 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3293 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3294 /* Handled by vectorizable_load and vectorizable_store. */
3297 if (gimple_call_lhs (stmt
) == NULL_TREE
3298 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3301 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3303 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3305 /* Process function arguments. */
3306 rhs_type
= NULL_TREE
;
3307 vectype_in
= NULL_TREE
;
3308 nargs
= gimple_call_num_args (stmt
);
3310 /* Bail out if the function has more than three arguments, we do not have
3311 interesting builtin functions to vectorize with more than two arguments
3312 except for fma. No arguments is also not good. */
3313 if (nargs
== 0 || nargs
> 4)
3316 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3317 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3318 if (cfn
== CFN_GOMP_SIMD_LANE
)
3321 rhs_type
= unsigned_type_node
;
3325 if (internal_fn_p (cfn
))
3326 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3328 for (i
= 0; i
< nargs
; i
++)
3330 op
= gimple_call_arg (stmt
, i
);
3332 if ((int) i
== mask_opno
)
3334 if (!vect_check_scalar_mask (vinfo
,
3335 stmt_info
, op
, &dt
[i
], &vectypes
[i
]))
3340 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &vectypes
[i
]))
3342 if (dump_enabled_p ())
3343 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3344 "use not simple.\n");
3348 /* We can only handle calls with arguments of the same type. */
3350 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3352 if (dump_enabled_p ())
3353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3354 "argument types differ.\n");
3358 rhs_type
= TREE_TYPE (op
);
3361 vectype_in
= vectypes
[i
];
3362 else if (vectypes
[i
]
3363 && !types_compatible_p (vectypes
[i
], vectype_in
))
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3367 "argument vector types differ.\n");
3371 /* If all arguments are external or constant defs, infer the vector type
3372 from the scalar type. */
3374 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3376 gcc_assert (vectype_in
);
3379 if (dump_enabled_p ())
3380 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3381 "no vectype for scalar type %T\n", rhs_type
);
3385 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3386 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3387 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3388 by a pack of the two vectors into an SI vector. We would need
3389 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3390 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3392 if (dump_enabled_p ())
3393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3394 "mismatched vector sizes %T and %T\n",
3395 vectype_in
, vectype_out
);
3399 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3400 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3404 "mixed mask and nonmask vector types\n");
3409 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3410 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3411 if (known_eq (nunits_in
* 2, nunits_out
))
3413 else if (known_eq (nunits_out
, nunits_in
))
3415 else if (known_eq (nunits_out
* 2, nunits_in
))
3420 /* We only handle functions that do not read or clobber memory. */
3421 if (gimple_vuse (stmt
))
3423 if (dump_enabled_p ())
3424 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3425 "function reads from or writes to memory.\n");
3429 /* For now, we only vectorize functions if a target specific builtin
3430 is available. TODO -- in some cases, it might be profitable to
3431 insert the calls for pieces of the vector, in order to be able
3432 to vectorize other operations in the loop. */
3434 internal_fn ifn
= IFN_LAST
;
3435 tree callee
= gimple_call_fndecl (stmt
);
3437 /* First try using an internal function. */
3438 tree_code convert_code
= ERROR_MARK
;
3440 && (modifier
== NONE
3441 || (modifier
== NARROW
3442 && simple_integer_narrowing (vectype_out
, vectype_in
,
3444 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3447 /* If that fails, try asking for a target-specific built-in function. */
3448 if (ifn
== IFN_LAST
)
3450 if (cfn
!= CFN_LAST
)
3451 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3452 (cfn
, vectype_out
, vectype_in
);
3453 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3454 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3455 (callee
, vectype_out
, vectype_in
);
3458 if (ifn
== IFN_LAST
&& !fndecl
)
3460 if (cfn
== CFN_GOMP_SIMD_LANE
3463 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3464 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3465 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3466 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3468 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3469 { 0, 1, 2, ... vf - 1 } vector. */
3470 gcc_assert (nargs
== 0);
3472 else if (modifier
== NONE
3473 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3474 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3475 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3476 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3477 vectype_in
, cost_vec
);
3480 if (dump_enabled_p ())
3481 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3482 "function is not vectorizable.\n");
3489 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3490 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3492 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3494 /* Sanity check: make sure that at least one copy of the vectorized stmt
3495 needs to be generated. */
3496 gcc_assert (ncopies
>= 1);
3498 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3499 if (!vec_stmt
) /* transformation not required. */
3501 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3502 DUMP_VECT_SCOPE ("vectorizable_call");
3503 vect_model_simple_cost (vinfo
, stmt_info
,
3504 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3505 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3506 record_stmt_cost (cost_vec
, ncopies
/ 2,
3507 vec_promote_demote
, stmt_info
, 0, vect_body
);
3509 if (loop_vinfo
&& mask_opno
>= 0)
3511 unsigned int nvectors
= (slp_node
3512 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3514 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3515 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3516 vectype_out
, scalar_mask
);
3523 if (dump_enabled_p ())
3524 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3527 scalar_dest
= gimple_call_lhs (stmt
);
3528 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3530 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3532 stmt_vec_info new_stmt_info
= NULL
;
3533 prev_stmt_info
= NULL
;
3534 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3536 tree prev_res
= NULL_TREE
;
3537 vargs
.safe_grow (nargs
);
3538 orig_vargs
.safe_grow (nargs
);
3539 for (j
= 0; j
< ncopies
; ++j
)
3541 /* Build argument list for the vectorized call. */
3544 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3545 vec
<tree
> vec_oprnds0
;
3547 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3548 vec_oprnds0
= vec_defs
[0];
3550 /* Arguments are ready. Create the new vector stmt. */
3551 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3554 for (k
= 0; k
< nargs
; k
++)
3556 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3557 vargs
[k
] = vec_oprndsk
[i
];
3559 if (modifier
== NARROW
)
3561 /* We don't define any narrowing conditional functions
3563 gcc_assert (mask_opno
< 0);
3564 tree half_res
= make_ssa_name (vectype_in
);
3566 = gimple_build_call_internal_vec (ifn
, vargs
);
3567 gimple_call_set_lhs (call
, half_res
);
3568 gimple_call_set_nothrow (call
, true);
3569 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3572 prev_res
= half_res
;
3575 new_temp
= make_ssa_name (vec_dest
);
3577 = gimple_build_assign (new_temp
, convert_code
,
3578 prev_res
, half_res
);
3580 = vect_finish_stmt_generation (vinfo
, stmt_info
,
3585 if (mask_opno
>= 0 && masked_loop_p
)
3587 unsigned int vec_num
= vec_oprnds0
.length ();
3588 /* Always true for SLP. */
3589 gcc_assert (ncopies
== 1);
3590 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3592 vargs
[mask_opno
] = prepare_load_store_mask
3593 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3597 if (ifn
!= IFN_LAST
)
3598 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3600 call
= gimple_build_call_vec (fndecl
, vargs
);
3601 new_temp
= make_ssa_name (vec_dest
, call
);
3602 gimple_call_set_lhs (call
, new_temp
);
3603 gimple_call_set_nothrow (call
, true);
3605 = vect_finish_stmt_generation (vinfo
, stmt_info
,
3608 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3611 for (i
= 0; i
< nargs
; i
++)
3613 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3614 vec_oprndsi
.release ();
3619 for (i
= 0; i
< nargs
; i
++)
3621 op
= gimple_call_arg (stmt
, i
);
3624 = vect_get_vec_def_for_operand (vinfo
,
3625 op
, stmt_info
, vectypes
[i
]);
3628 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3630 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3633 if (mask_opno
>= 0 && masked_loop_p
)
3635 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3638 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3639 vargs
[mask_opno
], gsi
);
3642 if (cfn
== CFN_GOMP_SIMD_LANE
)
3644 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3646 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3647 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3648 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3649 new_temp
= make_ssa_name (vec_dest
);
3650 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3652 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3654 else if (modifier
== NARROW
)
3656 /* We don't define any narrowing conditional functions at
3658 gcc_assert (mask_opno
< 0);
3659 tree half_res
= make_ssa_name (vectype_in
);
3660 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3661 gimple_call_set_lhs (call
, half_res
);
3662 gimple_call_set_nothrow (call
, true);
3663 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3666 prev_res
= half_res
;
3669 new_temp
= make_ssa_name (vec_dest
);
3670 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3671 prev_res
, half_res
);
3673 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3678 if (ifn
!= IFN_LAST
)
3679 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3681 call
= gimple_build_call_vec (fndecl
, vargs
);
3682 new_temp
= make_ssa_name (vec_dest
, call
);
3683 gimple_call_set_lhs (call
, new_temp
);
3684 gimple_call_set_nothrow (call
, true);
3686 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3689 if (j
== (modifier
== NARROW
? 1 : 0))
3690 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3692 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3694 prev_stmt_info
= new_stmt_info
;
3697 else if (modifier
== NARROW
)
3699 /* We don't define any narrowing conditional functions at present. */
3700 gcc_assert (mask_opno
< 0);
3701 for (j
= 0; j
< ncopies
; ++j
)
3703 /* Build argument list for the vectorized call. */
3705 vargs
.create (nargs
* 2);
3711 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3712 vec
<tree
> vec_oprnds0
;
3714 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3715 vec_oprnds0
= vec_defs
[0];
3717 /* Arguments are ready. Create the new vector stmt. */
3718 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3722 for (k
= 0; k
< nargs
; k
++)
3724 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3725 vargs
.quick_push (vec_oprndsk
[i
]);
3726 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3729 if (ifn
!= IFN_LAST
)
3730 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3732 call
= gimple_build_call_vec (fndecl
, vargs
);
3733 new_temp
= make_ssa_name (vec_dest
, call
);
3734 gimple_call_set_lhs (call
, new_temp
);
3735 gimple_call_set_nothrow (call
, true);
3737 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3738 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3741 for (i
= 0; i
< nargs
; i
++)
3743 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3744 vec_oprndsi
.release ();
3749 for (i
= 0; i
< nargs
; i
++)
3751 op
= gimple_call_arg (stmt
, i
);
3755 = vect_get_vec_def_for_operand (vinfo
, op
, stmt_info
,
3758 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3762 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3765 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3767 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3770 vargs
.quick_push (vec_oprnd0
);
3771 vargs
.quick_push (vec_oprnd1
);
3774 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3775 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3776 gimple_call_set_lhs (new_stmt
, new_temp
);
3778 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3781 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3783 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3785 prev_stmt_info
= new_stmt_info
;
3788 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3791 /* No current target implements this case. */
3796 /* The call in STMT might prevent it from being removed in dce.
3797 We however cannot remove it here, due to the way the ssa name
3798 it defines is mapped to the new definition. So just replace
3799 rhs of the statement with something harmless. */
3804 stmt_info
= vect_orig_stmt (stmt_info
);
3805 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3808 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3809 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3815 struct simd_call_arg_info
3819 HOST_WIDE_INT linear_step
;
3820 enum vect_def_type dt
;
3822 bool simd_lane_linear
;
3825 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3826 is linear within simd lane (but not within whole loop), note it in
3830 vect_simd_lane_linear (tree op
, class loop
*loop
,
3831 struct simd_call_arg_info
*arginfo
)
3833 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3835 if (!is_gimple_assign (def_stmt
)
3836 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3837 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3840 tree base
= gimple_assign_rhs1 (def_stmt
);
3841 HOST_WIDE_INT linear_step
= 0;
3842 tree v
= gimple_assign_rhs2 (def_stmt
);
3843 while (TREE_CODE (v
) == SSA_NAME
)
3846 def_stmt
= SSA_NAME_DEF_STMT (v
);
3847 if (is_gimple_assign (def_stmt
))
3848 switch (gimple_assign_rhs_code (def_stmt
))
3851 t
= gimple_assign_rhs2 (def_stmt
);
3852 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3854 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3855 v
= gimple_assign_rhs1 (def_stmt
);
3858 t
= gimple_assign_rhs2 (def_stmt
);
3859 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3861 linear_step
= tree_to_shwi (t
);
3862 v
= gimple_assign_rhs1 (def_stmt
);
3865 t
= gimple_assign_rhs1 (def_stmt
);
3866 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3867 || (TYPE_PRECISION (TREE_TYPE (v
))
3868 < TYPE_PRECISION (TREE_TYPE (t
))))
3877 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3879 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3880 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3885 arginfo
->linear_step
= linear_step
;
3887 arginfo
->simd_lane_linear
= true;
3893 /* Return the number of elements in vector type VECTYPE, which is associated
3894 with a SIMD clone. At present these vectors always have a constant
3897 static unsigned HOST_WIDE_INT
3898 simd_clone_subparts (tree vectype
)
3900 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3903 /* Function vectorizable_simd_clone_call.
3905 Check if STMT_INFO performs a function call that can be vectorized
3906 by calling a simd clone of the function.
3907 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3908 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3909 Return true if STMT_INFO is vectorizable in this way. */
3912 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3913 gimple_stmt_iterator
*gsi
,
3914 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3915 stmt_vector_for_cost
*)
3920 tree vec_oprnd0
= NULL_TREE
;
3921 stmt_vec_info prev_stmt_info
;
3923 unsigned int nunits
;
3924 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3925 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3926 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3927 tree fndecl
, new_temp
;
3929 auto_vec
<simd_call_arg_info
> arginfo
;
3930 vec
<tree
> vargs
= vNULL
;
3932 tree lhs
, rtype
, ratype
;
3933 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3935 /* Is STMT a vectorizable call? */
3936 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3940 fndecl
= gimple_call_fndecl (stmt
);
3941 if (fndecl
== NULL_TREE
)
3944 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3945 if (node
== NULL
|| node
->simd_clones
== NULL
)
3948 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3951 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3955 if (gimple_call_lhs (stmt
)
3956 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3959 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3961 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3963 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3970 /* Process function arguments. */
3971 nargs
= gimple_call_num_args (stmt
);
3973 /* Bail out if the function has zero arguments. */
3977 arginfo
.reserve (nargs
, true);
3979 for (i
= 0; i
< nargs
; i
++)
3981 simd_call_arg_info thisarginfo
;
3984 thisarginfo
.linear_step
= 0;
3985 thisarginfo
.align
= 0;
3986 thisarginfo
.op
= NULL_TREE
;
3987 thisarginfo
.simd_lane_linear
= false;
3989 op
= gimple_call_arg (stmt
, i
);
3990 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3991 &thisarginfo
.vectype
)
3992 || thisarginfo
.dt
== vect_uninitialized_def
)
3994 if (dump_enabled_p ())
3995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3996 "use not simple.\n");
4000 if (thisarginfo
.dt
== vect_constant_def
4001 || thisarginfo
.dt
== vect_external_def
)
4002 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
4005 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4006 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
4008 if (dump_enabled_p ())
4009 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4010 "vector mask arguments are not supported\n");
4015 /* For linear arguments, the analyze phase should have saved
4016 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4017 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
4018 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
4020 gcc_assert (vec_stmt
);
4021 thisarginfo
.linear_step
4022 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
4024 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
4025 thisarginfo
.simd_lane_linear
4026 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
4027 == boolean_true_node
);
4028 /* If loop has been peeled for alignment, we need to adjust it. */
4029 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4030 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4031 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4033 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4034 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
4035 tree opt
= TREE_TYPE (thisarginfo
.op
);
4036 bias
= fold_convert (TREE_TYPE (step
), bias
);
4037 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4039 = fold_build2 (POINTER_TYPE_P (opt
)
4040 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4041 thisarginfo
.op
, bias
);
4045 && thisarginfo
.dt
!= vect_constant_def
4046 && thisarginfo
.dt
!= vect_external_def
4048 && TREE_CODE (op
) == SSA_NAME
4049 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4051 && tree_fits_shwi_p (iv
.step
))
4053 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4054 thisarginfo
.op
= iv
.base
;
4056 else if ((thisarginfo
.dt
== vect_constant_def
4057 || thisarginfo
.dt
== vect_external_def
)
4058 && POINTER_TYPE_P (TREE_TYPE (op
)))
4059 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4060 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4062 if (POINTER_TYPE_P (TREE_TYPE (op
))
4063 && !thisarginfo
.linear_step
4065 && thisarginfo
.dt
!= vect_constant_def
4066 && thisarginfo
.dt
!= vect_external_def
4069 && TREE_CODE (op
) == SSA_NAME
)
4070 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4072 arginfo
.quick_push (thisarginfo
);
4075 unsigned HOST_WIDE_INT vf
;
4076 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
4078 if (dump_enabled_p ())
4079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4080 "not considering SIMD clones; not yet supported"
4081 " for variable-width vectors.\n");
4085 unsigned int badness
= 0;
4086 struct cgraph_node
*bestn
= NULL
;
4087 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4088 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4090 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4091 n
= n
->simdclone
->next_clone
)
4093 unsigned int this_badness
= 0;
4094 if (n
->simdclone
->simdlen
> vf
4095 || n
->simdclone
->nargs
!= nargs
)
4097 if (n
->simdclone
->simdlen
< vf
)
4098 this_badness
+= (exact_log2 (vf
)
4099 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4100 if (n
->simdclone
->inbranch
)
4101 this_badness
+= 2048;
4102 int target_badness
= targetm
.simd_clone
.usable (n
);
4103 if (target_badness
< 0)
4105 this_badness
+= target_badness
* 512;
4106 /* FORNOW: Have to add code to add the mask argument. */
4107 if (n
->simdclone
->inbranch
)
4109 for (i
= 0; i
< nargs
; i
++)
4111 switch (n
->simdclone
->args
[i
].arg_type
)
4113 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4114 if (!useless_type_conversion_p
4115 (n
->simdclone
->args
[i
].orig_type
,
4116 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4118 else if (arginfo
[i
].dt
== vect_constant_def
4119 || arginfo
[i
].dt
== vect_external_def
4120 || arginfo
[i
].linear_step
)
4123 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4124 if (arginfo
[i
].dt
!= vect_constant_def
4125 && arginfo
[i
].dt
!= vect_external_def
)
4128 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4129 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4130 if (arginfo
[i
].dt
== vect_constant_def
4131 || arginfo
[i
].dt
== vect_external_def
4132 || (arginfo
[i
].linear_step
4133 != n
->simdclone
->args
[i
].linear_step
))
4136 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4137 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4138 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4139 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4140 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4141 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4145 case SIMD_CLONE_ARG_TYPE_MASK
:
4148 if (i
== (size_t) -1)
4150 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4155 if (arginfo
[i
].align
)
4156 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4157 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4159 if (i
== (size_t) -1)
4161 if (bestn
== NULL
|| this_badness
< badness
)
4164 badness
= this_badness
;
4171 for (i
= 0; i
< nargs
; i
++)
4172 if ((arginfo
[i
].dt
== vect_constant_def
4173 || arginfo
[i
].dt
== vect_external_def
)
4174 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4176 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4177 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4179 if (arginfo
[i
].vectype
== NULL
4180 || (simd_clone_subparts (arginfo
[i
].vectype
)
4181 > bestn
->simdclone
->simdlen
))
4185 fndecl
= bestn
->decl
;
4186 nunits
= bestn
->simdclone
->simdlen
;
4187 ncopies
= vf
/ nunits
;
4189 /* If the function isn't const, only allow it in simd loops where user
4190 has asserted that at least nunits consecutive iterations can be
4191 performed using SIMD instructions. */
4192 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4193 && gimple_vuse (stmt
))
4196 /* Sanity check: make sure that at least one copy of the vectorized stmt
4197 needs to be generated. */
4198 gcc_assert (ncopies
>= 1);
4200 if (!vec_stmt
) /* transformation not required. */
4202 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4203 for (i
= 0; i
< nargs
; i
++)
4204 if ((bestn
->simdclone
->args
[i
].arg_type
4205 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4206 || (bestn
->simdclone
->args
[i
].arg_type
4207 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4209 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4211 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4212 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4213 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4214 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4215 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4216 tree sll
= arginfo
[i
].simd_lane_linear
4217 ? boolean_true_node
: boolean_false_node
;
4218 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4220 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4221 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4222 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4223 dt, slp_node, cost_vec); */
4229 if (dump_enabled_p ())
4230 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4233 scalar_dest
= gimple_call_lhs (stmt
);
4234 vec_dest
= NULL_TREE
;
4239 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4240 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4241 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4244 rtype
= TREE_TYPE (ratype
);
4248 prev_stmt_info
= NULL
;
4249 for (j
= 0; j
< ncopies
; ++j
)
4251 /* Build argument list for the vectorized call. */
4253 vargs
.create (nargs
);
4257 for (i
= 0; i
< nargs
; i
++)
4259 unsigned int k
, l
, m
, o
;
4261 op
= gimple_call_arg (stmt
, i
);
4262 switch (bestn
->simdclone
->args
[i
].arg_type
)
4264 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4265 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4266 o
= nunits
/ simd_clone_subparts (atype
);
4267 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4269 if (simd_clone_subparts (atype
)
4270 < simd_clone_subparts (arginfo
[i
].vectype
))
4272 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4273 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4274 / simd_clone_subparts (atype
));
4275 gcc_assert ((k
& (k
- 1)) == 0);
4278 = vect_get_vec_def_for_operand (vinfo
, op
, stmt_info
);
4281 vec_oprnd0
= arginfo
[i
].op
;
4282 if ((m
& (k
- 1)) == 0)
4284 = vect_get_vec_def_for_stmt_copy (vinfo
,
4287 arginfo
[i
].op
= vec_oprnd0
;
4289 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4291 bitsize_int ((m
& (k
- 1)) * prec
));
4293 = gimple_build_assign (make_ssa_name (atype
),
4295 vect_finish_stmt_generation (vinfo
, stmt_info
,
4297 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4301 k
= (simd_clone_subparts (atype
)
4302 / simd_clone_subparts (arginfo
[i
].vectype
));
4303 gcc_assert ((k
& (k
- 1)) == 0);
4304 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4306 vec_alloc (ctor_elts
, k
);
4309 for (l
= 0; l
< k
; l
++)
4311 if (m
== 0 && l
== 0)
4313 = vect_get_vec_def_for_operand (vinfo
,
4317 = vect_get_vec_def_for_stmt_copy (vinfo
,
4319 arginfo
[i
].op
= vec_oprnd0
;
4322 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4326 vargs
.safe_push (vec_oprnd0
);
4329 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4331 = gimple_build_assign (make_ssa_name (atype
),
4333 vect_finish_stmt_generation (vinfo
, stmt_info
,
4335 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4340 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4341 vargs
.safe_push (op
);
4343 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4344 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4349 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4350 &stmts
, true, NULL_TREE
);
4354 edge pe
= loop_preheader_edge (loop
);
4355 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4356 gcc_assert (!new_bb
);
4358 if (arginfo
[i
].simd_lane_linear
)
4360 vargs
.safe_push (arginfo
[i
].op
);
4363 tree phi_res
= copy_ssa_name (op
);
4364 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4365 loop_vinfo
->add_stmt (new_phi
);
4366 add_phi_arg (new_phi
, arginfo
[i
].op
,
4367 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4369 = POINTER_TYPE_P (TREE_TYPE (op
))
4370 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4371 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4372 ? sizetype
: TREE_TYPE (op
);
4374 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4376 tree tcst
= wide_int_to_tree (type
, cst
);
4377 tree phi_arg
= copy_ssa_name (op
);
4379 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4380 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4381 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4382 loop_vinfo
->add_stmt (new_stmt
);
4383 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4385 arginfo
[i
].op
= phi_res
;
4386 vargs
.safe_push (phi_res
);
4391 = POINTER_TYPE_P (TREE_TYPE (op
))
4392 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4393 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4394 ? sizetype
: TREE_TYPE (op
);
4396 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4398 tree tcst
= wide_int_to_tree (type
, cst
);
4399 new_temp
= make_ssa_name (TREE_TYPE (op
));
4401 = gimple_build_assign (new_temp
, code
,
4402 arginfo
[i
].op
, tcst
);
4403 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4404 vargs
.safe_push (new_temp
);
4407 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4408 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4409 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4410 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4411 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4412 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4418 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4421 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4423 new_temp
= create_tmp_var (ratype
);
4424 else if (simd_clone_subparts (vectype
)
4425 == simd_clone_subparts (rtype
))
4426 new_temp
= make_ssa_name (vec_dest
, new_call
);
4428 new_temp
= make_ssa_name (rtype
, new_call
);
4429 gimple_call_set_lhs (new_call
, new_temp
);
4431 stmt_vec_info new_stmt_info
4432 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4436 if (simd_clone_subparts (vectype
) < nunits
)
4439 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4440 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4441 k
= nunits
/ simd_clone_subparts (vectype
);
4442 gcc_assert ((k
& (k
- 1)) == 0);
4443 for (l
= 0; l
< k
; l
++)
4448 t
= build_fold_addr_expr (new_temp
);
4449 t
= build2 (MEM_REF
, vectype
, t
,
4450 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4453 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4454 bitsize_int (prec
), bitsize_int (l
* prec
));
4456 = gimple_build_assign (make_ssa_name (vectype
), t
);
4458 = vect_finish_stmt_generation (vinfo
, stmt_info
,
4461 if (j
== 0 && l
== 0)
4462 STMT_VINFO_VEC_STMT (stmt_info
)
4463 = *vec_stmt
= new_stmt_info
;
4465 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4467 prev_stmt_info
= new_stmt_info
;
4471 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4474 else if (simd_clone_subparts (vectype
) > nunits
)
4476 unsigned int k
= (simd_clone_subparts (vectype
)
4477 / simd_clone_subparts (rtype
));
4478 gcc_assert ((k
& (k
- 1)) == 0);
4479 if ((j
& (k
- 1)) == 0)
4480 vec_alloc (ret_ctor_elts
, k
);
4483 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4484 for (m
= 0; m
< o
; m
++)
4486 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4487 size_int (m
), NULL_TREE
, NULL_TREE
);
4489 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4491 = vect_finish_stmt_generation (vinfo
, stmt_info
,
4493 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4494 gimple_assign_lhs (new_stmt
));
4496 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4499 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4500 if ((j
& (k
- 1)) != k
- 1)
4502 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4504 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4506 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4508 if ((unsigned) j
== k
- 1)
4509 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4511 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4513 prev_stmt_info
= new_stmt_info
;
4518 tree t
= build_fold_addr_expr (new_temp
);
4519 t
= build2 (MEM_REF
, vectype
, t
,
4520 build_int_cst (TREE_TYPE (t
), 0));
4522 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4524 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4525 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4530 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4532 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4534 prev_stmt_info
= new_stmt_info
;
4539 /* The call in STMT might prevent it from being removed in dce.
4540 We however cannot remove it here, due to the way the ssa name
4541 it defines is mapped to the new definition. So just replace
4542 rhs of the statement with something harmless. */
4550 type
= TREE_TYPE (scalar_dest
);
4551 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4552 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4555 new_stmt
= gimple_build_nop ();
4556 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4557 unlink_stmt_vdef (stmt
);
4563 /* Function vect_gen_widened_results_half
4565 Create a vector stmt whose code, type, number of arguments, and result
4566 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4567 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4568 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4569 needs to be created (DECL is a function-decl of a target-builtin).
4570 STMT_INFO is the original scalar stmt that we are vectorizing. */
4573 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4574 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4575 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4576 stmt_vec_info stmt_info
)
4581 /* Generate half of the widened result: */
4582 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4583 if (op_type
!= binary_op
)
4585 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4586 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4587 gimple_assign_set_lhs (new_stmt
, new_temp
);
4588 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4594 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4595 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4596 containing scalar operand), and for the rest we get a copy with
4597 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4598 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4599 The vectors are collected into VEC_OPRNDS. */
4602 vect_get_loop_based_defs (vec_info
*vinfo
, tree
*oprnd
, stmt_vec_info stmt_info
,
4603 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4607 /* Get first vector operand. */
4608 /* All the vector operands except the very first one (that is scalar oprnd)
4610 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4611 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, *oprnd
, stmt_info
);
4613 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4615 vec_oprnds
->quick_push (vec_oprnd
);
4617 /* Get second vector operand. */
4618 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4619 vec_oprnds
->quick_push (vec_oprnd
);
4623 /* For conversion in multiple steps, continue to get operands
4626 vect_get_loop_based_defs (vinfo
, oprnd
, stmt_info
, vec_oprnds
,
4627 multi_step_cvt
- 1);
4631 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4632 For multi-step conversions store the resulting vectors and call the function
4636 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4638 stmt_vec_info stmt_info
,
4640 gimple_stmt_iterator
*gsi
,
4641 slp_tree slp_node
, enum tree_code code
,
4642 stmt_vec_info
*prev_stmt_info
)
4645 tree vop0
, vop1
, new_tmp
, vec_dest
;
4647 vec_dest
= vec_dsts
.pop ();
4649 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4651 /* Create demotion operation. */
4652 vop0
= (*vec_oprnds
)[i
];
4653 vop1
= (*vec_oprnds
)[i
+ 1];
4654 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4655 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4656 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4657 stmt_vec_info new_stmt_info
4658 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4661 /* Store the resulting vector for next recursive call. */
4662 (*vec_oprnds
)[i
/2] = new_tmp
;
4665 /* This is the last step of the conversion sequence. Store the
4666 vectors in SLP_NODE or in vector info of the scalar statement
4667 (or in STMT_VINFO_RELATED_STMT chain). */
4669 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4672 if (!*prev_stmt_info
)
4673 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4675 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4677 *prev_stmt_info
= new_stmt_info
;
4682 /* For multi-step demotion operations we first generate demotion operations
4683 from the source type to the intermediate types, and then combine the
4684 results (stored in VEC_OPRNDS) in demotion operation to the destination
4688 /* At each level of recursion we have half of the operands we had at the
4690 vec_oprnds
->truncate ((i
+1)/2);
4691 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4693 stmt_info
, vec_dsts
, gsi
,
4694 slp_node
, VEC_PACK_TRUNC_EXPR
,
4698 vec_dsts
.quick_push (vec_dest
);
4702 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4703 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4704 STMT_INFO. For multi-step conversions store the resulting vectors and
4705 call the function recursively. */
4708 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4709 vec
<tree
> *vec_oprnds0
,
4710 vec
<tree
> *vec_oprnds1
,
4711 stmt_vec_info stmt_info
, tree vec_dest
,
4712 gimple_stmt_iterator
*gsi
,
4713 enum tree_code code1
,
4714 enum tree_code code2
, int op_type
)
4717 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4718 gimple
*new_stmt1
, *new_stmt2
;
4719 vec
<tree
> vec_tmp
= vNULL
;
4721 vec_tmp
.create (vec_oprnds0
->length () * 2);
4722 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4724 if (op_type
== binary_op
)
4725 vop1
= (*vec_oprnds1
)[i
];
4729 /* Generate the two halves of promotion operation. */
4730 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4731 op_type
, vec_dest
, gsi
,
4733 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4734 op_type
, vec_dest
, gsi
,
4736 if (is_gimple_call (new_stmt1
))
4738 new_tmp1
= gimple_call_lhs (new_stmt1
);
4739 new_tmp2
= gimple_call_lhs (new_stmt2
);
4743 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4744 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4747 /* Store the results for the next step. */
4748 vec_tmp
.quick_push (new_tmp1
);
4749 vec_tmp
.quick_push (new_tmp2
);
4752 vec_oprnds0
->release ();
4753 *vec_oprnds0
= vec_tmp
;
4757 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4758 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4759 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4760 Return true if STMT_INFO is vectorizable in this way. */
4763 vectorizable_conversion (vec_info
*vinfo
,
4764 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4765 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4766 stmt_vector_for_cost
*cost_vec
)
4770 tree op0
, op1
= NULL_TREE
;
4771 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4772 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4773 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4774 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4776 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4778 stmt_vec_info prev_stmt_info
;
4779 poly_uint64 nunits_in
;
4780 poly_uint64 nunits_out
;
4781 tree vectype_out
, vectype_in
;
4783 tree lhs_type
, rhs_type
;
4784 enum { NARROW
, NONE
, WIDEN
} modifier
;
4785 vec
<tree
> vec_oprnds0
= vNULL
;
4786 vec
<tree
> vec_oprnds1
= vNULL
;
4788 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4789 int multi_step_cvt
= 0;
4790 vec
<tree
> interm_types
= vNULL
;
4791 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4793 unsigned short fltsz
;
4795 /* Is STMT a vectorizable conversion? */
4797 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4800 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4804 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4808 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4811 code
= gimple_assign_rhs_code (stmt
);
4812 if (!CONVERT_EXPR_CODE_P (code
)
4813 && code
!= FIX_TRUNC_EXPR
4814 && code
!= FLOAT_EXPR
4815 && code
!= WIDEN_MULT_EXPR
4816 && code
!= WIDEN_LSHIFT_EXPR
)
4819 op_type
= TREE_CODE_LENGTH (code
);
4821 /* Check types of lhs and rhs. */
4822 scalar_dest
= gimple_assign_lhs (stmt
);
4823 lhs_type
= TREE_TYPE (scalar_dest
);
4824 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4826 op0
= gimple_assign_rhs1 (stmt
);
4827 rhs_type
= TREE_TYPE (op0
);
4829 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4830 && !((INTEGRAL_TYPE_P (lhs_type
)
4831 && INTEGRAL_TYPE_P (rhs_type
))
4832 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4833 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4836 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4837 && ((INTEGRAL_TYPE_P (lhs_type
)
4838 && !type_has_mode_precision_p (lhs_type
))
4839 || (INTEGRAL_TYPE_P (rhs_type
)
4840 && !type_has_mode_precision_p (rhs_type
))))
4842 if (dump_enabled_p ())
4843 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4844 "type conversion to/from bit-precision unsupported."
4849 /* Check the operands of the operation. */
4850 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4852 if (dump_enabled_p ())
4853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4854 "use not simple.\n");
4857 if (op_type
== binary_op
)
4861 op1
= gimple_assign_rhs2 (stmt
);
4862 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4863 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4865 if (CONSTANT_CLASS_P (op0
))
4866 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4868 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4874 "use not simple.\n");
4879 /* If op0 is an external or constant def, infer the vector type
4880 from the scalar type. */
4882 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4884 gcc_assert (vectype_in
);
4887 if (dump_enabled_p ())
4888 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4889 "no vectype for scalar type %T\n", rhs_type
);
4894 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4895 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4897 if (dump_enabled_p ())
4898 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4899 "can't convert between boolean and non "
4900 "boolean vectors %T\n", rhs_type
);
4905 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4906 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4907 if (known_eq (nunits_out
, nunits_in
))
4909 else if (multiple_p (nunits_out
, nunits_in
))
4913 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4917 /* Multiple types in SLP are handled by creating the appropriate number of
4918 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4922 else if (modifier
== NARROW
)
4923 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4925 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4927 /* Sanity check: make sure that at least one copy of the vectorized stmt
4928 needs to be generated. */
4929 gcc_assert (ncopies
>= 1);
4931 bool found_mode
= false;
4932 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4933 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4934 opt_scalar_mode rhs_mode_iter
;
4936 /* Supportable by target? */
4940 if (code
!= FIX_TRUNC_EXPR
4941 && code
!= FLOAT_EXPR
4942 && !CONVERT_EXPR_CODE_P (code
))
4944 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4948 if (dump_enabled_p ())
4949 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4950 "conversion not supported by target.\n");
4954 if (supportable_widening_operation (vinfo
, code
, stmt_info
, vectype_out
,
4955 vectype_in
, &code1
, &code2
,
4956 &multi_step_cvt
, &interm_types
))
4958 /* Binary widening operation can only be supported directly by the
4960 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4964 if (code
!= FLOAT_EXPR
4965 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4968 fltsz
= GET_MODE_SIZE (lhs_mode
);
4969 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4971 rhs_mode
= rhs_mode_iter
.require ();
4972 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4976 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4977 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4978 if (cvt_type
== NULL_TREE
)
4981 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4983 if (!supportable_convert_operation (code
, vectype_out
,
4984 cvt_type
, &codecvt1
))
4987 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4988 vectype_out
, cvt_type
,
4989 &codecvt1
, &codecvt2
,
4994 gcc_assert (multi_step_cvt
== 0);
4996 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4998 vectype_in
, &code1
, &code2
,
4999 &multi_step_cvt
, &interm_types
))
5009 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5010 codecvt2
= ERROR_MARK
;
5014 interm_types
.safe_push (cvt_type
);
5015 cvt_type
= NULL_TREE
;
5020 gcc_assert (op_type
== unary_op
);
5021 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5022 &code1
, &multi_step_cvt
,
5026 if (code
!= FIX_TRUNC_EXPR
5027 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5031 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5032 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5033 if (cvt_type
== NULL_TREE
)
5035 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
5038 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5039 &code1
, &multi_step_cvt
,
5048 if (!vec_stmt
) /* transformation not required. */
5050 DUMP_VECT_SCOPE ("vectorizable_conversion");
5051 if (modifier
== NONE
)
5053 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5054 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5057 else if (modifier
== NARROW
)
5059 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5060 /* The final packing step produces one vector result per copy. */
5061 unsigned int nvectors
5062 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5063 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5064 multi_step_cvt
, cost_vec
);
5068 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5069 /* The initial unpacking step produces two vector results
5070 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5071 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5072 unsigned int nvectors
5074 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5076 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5077 multi_step_cvt
, cost_vec
);
5079 interm_types
.release ();
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_NOTE
, vect_location
,
5086 "transform conversion. ncopies = %d.\n", ncopies
);
5088 if (op_type
== binary_op
)
5090 if (CONSTANT_CLASS_P (op0
))
5091 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5092 else if (CONSTANT_CLASS_P (op1
))
5093 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5096 /* In case of multi-step conversion, we first generate conversion operations
5097 to the intermediate types, and then from that types to the final one.
5098 We create vector destinations for the intermediate type (TYPES) received
5099 from supportable_*_operation, and store them in the correct order
5100 for future use in vect_create_vectorized_*_stmts (). */
5101 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5102 vec_dest
= vect_create_destination_var (scalar_dest
,
5103 (cvt_type
&& modifier
== WIDEN
)
5104 ? cvt_type
: vectype_out
);
5105 vec_dsts
.quick_push (vec_dest
);
5109 for (i
= interm_types
.length () - 1;
5110 interm_types
.iterate (i
, &intermediate_type
); i
--)
5112 vec_dest
= vect_create_destination_var (scalar_dest
,
5114 vec_dsts
.quick_push (vec_dest
);
5119 vec_dest
= vect_create_destination_var (scalar_dest
,
5121 ? vectype_out
: cvt_type
);
5125 if (modifier
== WIDEN
)
5127 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5128 if (op_type
== binary_op
)
5129 vec_oprnds1
.create (1);
5131 else if (modifier
== NARROW
)
5132 vec_oprnds0
.create (
5133 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5135 else if (code
== WIDEN_LSHIFT_EXPR
)
5136 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5139 prev_stmt_info
= NULL
;
5143 for (j
= 0; j
< ncopies
; j
++)
5146 vect_get_vec_defs (vinfo
, op0
, NULL
, stmt_info
, &vec_oprnds0
,
5149 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5151 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5153 stmt_vec_info new_stmt_info
;
5154 /* Arguments are ready, create the new vector stmt. */
5155 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5156 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5157 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5158 gimple_assign_set_lhs (new_stmt
, new_temp
);
5160 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5163 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5166 if (!prev_stmt_info
)
5167 STMT_VINFO_VEC_STMT (stmt_info
)
5168 = *vec_stmt
= new_stmt_info
;
5170 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5171 prev_stmt_info
= new_stmt_info
;
5178 /* In case the vectorization factor (VF) is bigger than the number
5179 of elements that we can fit in a vectype (nunits), we have to
5180 generate more than one vector stmt - i.e - we need to "unroll"
5181 the vector stmt by a factor VF/nunits. */
5182 for (j
= 0; j
< ncopies
; j
++)
5189 if (code
== WIDEN_LSHIFT_EXPR
)
5194 /* Store vec_oprnd1 for every vector stmt to be created
5195 for SLP_NODE. We check during the analysis that all
5196 the shift arguments are the same. */
5197 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5198 vec_oprnds1
.quick_push (vec_oprnd1
);
5200 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
,
5201 &vec_oprnds0
, NULL
, slp_node
);
5204 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
, &vec_oprnds0
,
5205 &vec_oprnds1
, slp_node
);
5209 vec_oprnd0
= vect_get_vec_def_for_operand (vinfo
,
5211 vec_oprnds0
.quick_push (vec_oprnd0
);
5212 if (op_type
== binary_op
)
5214 if (code
== WIDEN_LSHIFT_EXPR
)
5218 = vect_get_vec_def_for_operand (vinfo
,
5220 vec_oprnds1
.quick_push (vec_oprnd1
);
5226 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5227 vec_oprnds0
.truncate (0);
5228 vec_oprnds0
.quick_push (vec_oprnd0
);
5229 if (op_type
== binary_op
)
5231 if (code
== WIDEN_LSHIFT_EXPR
)
5234 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5236 vec_oprnds1
.truncate (0);
5237 vec_oprnds1
.quick_push (vec_oprnd1
);
5241 /* Arguments are ready. Create the new vector stmts. */
5242 for (i
= multi_step_cvt
; i
>= 0; i
--)
5244 tree this_dest
= vec_dsts
[i
];
5245 enum tree_code c1
= code1
, c2
= code2
;
5246 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5251 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5252 &vec_oprnds1
, stmt_info
,
5257 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5259 stmt_vec_info new_stmt_info
;
5262 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5263 new_temp
= make_ssa_name (vec_dest
);
5265 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5267 = vect_finish_stmt_generation (vinfo
, stmt_info
,
5271 new_stmt_info
= vinfo
->lookup_def (vop0
);
5274 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5277 if (!prev_stmt_info
)
5278 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5280 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5281 prev_stmt_info
= new_stmt_info
;
5286 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5290 /* In case the vectorization factor (VF) is bigger than the number
5291 of elements that we can fit in a vectype (nunits), we have to
5292 generate more than one vector stmt - i.e - we need to "unroll"
5293 the vector stmt by a factor VF/nunits. */
5294 for (j
= 0; j
< ncopies
; j
++)
5298 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
,
5302 vec_oprnds0
.truncate (0);
5303 vect_get_loop_based_defs (vinfo
,
5304 &last_oprnd
, stmt_info
, &vec_oprnds0
,
5305 vect_pow2 (multi_step_cvt
) - 1);
5308 /* Arguments are ready. Create the new vector stmts. */
5310 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5312 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5313 new_temp
= make_ssa_name (vec_dest
);
5315 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5316 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5317 vec_oprnds0
[i
] = new_temp
;
5320 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5322 stmt_info
, vec_dsts
, gsi
,
5327 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5331 vec_oprnds0
.release ();
5332 vec_oprnds1
.release ();
5333 interm_types
.release ();
5338 /* Return true if we can assume from the scalar form of STMT_INFO that
5339 neither the scalar nor the vector forms will generate code. STMT_INFO
5340 is known not to involve a data reference. */
5343 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5345 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5349 tree lhs
= gimple_assign_lhs (stmt
);
5350 tree_code code
= gimple_assign_rhs_code (stmt
);
5351 tree rhs
= gimple_assign_rhs1 (stmt
);
5353 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5356 if (CONVERT_EXPR_CODE_P (code
))
5357 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5362 /* Function vectorizable_assignment.
5364 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5365 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5366 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5367 Return true if STMT_INFO is vectorizable in this way. */
5370 vectorizable_assignment (vec_info
*vinfo
,
5371 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5372 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5373 stmt_vector_for_cost
*cost_vec
)
5378 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5380 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5384 vec
<tree
> vec_oprnds
= vNULL
;
5386 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5387 stmt_vec_info prev_stmt_info
= NULL
;
5388 enum tree_code code
;
5391 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5394 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5398 /* Is vectorizable assignment? */
5399 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5403 scalar_dest
= gimple_assign_lhs (stmt
);
5404 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5407 code
= gimple_assign_rhs_code (stmt
);
5408 if (gimple_assign_single_p (stmt
)
5409 || code
== PAREN_EXPR
5410 || CONVERT_EXPR_CODE_P (code
))
5411 op
= gimple_assign_rhs1 (stmt
);
5415 if (code
== VIEW_CONVERT_EXPR
)
5416 op
= TREE_OPERAND (op
, 0);
5418 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5419 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5421 /* Multiple types in SLP are handled by creating the appropriate number of
5422 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5427 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5429 gcc_assert (ncopies
>= 1);
5431 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5433 if (dump_enabled_p ())
5434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5435 "use not simple.\n");
5439 /* We can handle NOP_EXPR conversions that do not change the number
5440 of elements or the vector size. */
5441 if ((CONVERT_EXPR_CODE_P (code
)
5442 || code
== VIEW_CONVERT_EXPR
)
5444 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5445 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5446 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5449 /* We do not handle bit-precision changes. */
5450 if ((CONVERT_EXPR_CODE_P (code
)
5451 || code
== VIEW_CONVERT_EXPR
)
5452 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5453 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5454 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5455 /* But a conversion that does not change the bit-pattern is ok. */
5456 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5457 > TYPE_PRECISION (TREE_TYPE (op
)))
5458 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5459 /* Conversion between boolean types of different sizes is
5460 a simple assignment in case their vectypes are same
5462 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5463 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5465 if (dump_enabled_p ())
5466 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5467 "type conversion to/from bit-precision "
5472 if (!vec_stmt
) /* transformation not required. */
5474 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5475 DUMP_VECT_SCOPE ("vectorizable_assignment");
5476 if (!vect_nop_conversion_p (stmt_info
))
5477 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5483 if (dump_enabled_p ())
5484 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5487 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5490 for (j
= 0; j
< ncopies
; j
++)
5494 vect_get_vec_defs (vinfo
, op
, NULL
, stmt_info
, &vec_oprnds
, NULL
,
5497 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5499 /* Arguments are ready. create the new vector stmt. */
5500 stmt_vec_info new_stmt_info
= NULL
;
5501 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5503 if (CONVERT_EXPR_CODE_P (code
)
5504 || code
== VIEW_CONVERT_EXPR
)
5505 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5506 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5507 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5508 gimple_assign_set_lhs (new_stmt
, new_temp
);
5510 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5512 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5519 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5521 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5523 prev_stmt_info
= new_stmt_info
;
5526 vec_oprnds
.release ();
5531 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5532 either as shift by a scalar or by a vector. */
5535 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5538 machine_mode vec_mode
;
5543 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5547 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5549 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5551 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5553 || (optab_handler (optab
, TYPE_MODE (vectype
))
5554 == CODE_FOR_nothing
))
5558 vec_mode
= TYPE_MODE (vectype
);
5559 icode
= (int) optab_handler (optab
, vec_mode
);
5560 if (icode
== CODE_FOR_nothing
)
5567 /* Function vectorizable_shift.
5569 Check if STMT_INFO performs a shift operation that can be vectorized.
5570 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5571 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5572 Return true if STMT_INFO is vectorizable in this way. */
5575 vectorizable_shift (vec_info
*vinfo
,
5576 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5577 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5578 stmt_vector_for_cost
*cost_vec
)
5582 tree op0
, op1
= NULL
;
5583 tree vec_oprnd1
= NULL_TREE
;
5585 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5586 enum tree_code code
;
5587 machine_mode vec_mode
;
5591 machine_mode optab_op2_mode
;
5592 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5594 stmt_vec_info prev_stmt_info
;
5595 poly_uint64 nunits_in
;
5596 poly_uint64 nunits_out
;
5601 vec
<tree
> vec_oprnds0
= vNULL
;
5602 vec
<tree
> vec_oprnds1
= vNULL
;
5605 bool scalar_shift_arg
= true;
5606 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5607 bool incompatible_op1_vectype_p
= false;
5609 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5612 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5613 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5617 /* Is STMT a vectorizable binary/unary operation? */
5618 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5622 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5625 code
= gimple_assign_rhs_code (stmt
);
5627 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5628 || code
== RROTATE_EXPR
))
5631 scalar_dest
= gimple_assign_lhs (stmt
);
5632 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5633 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5635 if (dump_enabled_p ())
5636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5637 "bit-precision shifts not supported.\n");
5641 op0
= gimple_assign_rhs1 (stmt
);
5642 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5644 if (dump_enabled_p ())
5645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5646 "use not simple.\n");
5649 /* If op0 is an external or constant def, infer the vector type
5650 from the scalar type. */
5652 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5654 gcc_assert (vectype
);
5657 if (dump_enabled_p ())
5658 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5659 "no vectype for scalar type\n");
5663 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5664 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5665 if (maybe_ne (nunits_out
, nunits_in
))
5668 op1
= gimple_assign_rhs2 (stmt
);
5669 stmt_vec_info op1_def_stmt_info
;
5670 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5671 &op1_def_stmt_info
))
5673 if (dump_enabled_p ())
5674 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5675 "use not simple.\n");
5679 /* Multiple types in SLP are handled by creating the appropriate number of
5680 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5685 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5687 gcc_assert (ncopies
>= 1);
5689 /* Determine whether the shift amount is a vector, or scalar. If the
5690 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5692 if ((dt
[1] == vect_internal_def
5693 || dt
[1] == vect_induction_def
5694 || dt
[1] == vect_nested_cycle
)
5696 scalar_shift_arg
= false;
5697 else if (dt
[1] == vect_constant_def
5698 || dt
[1] == vect_external_def
5699 || dt
[1] == vect_internal_def
)
5701 /* In SLP, need to check whether the shift count is the same,
5702 in loops if it is a constant or invariant, it is always
5706 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5707 stmt_vec_info slpstmt_info
;
5709 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5711 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5712 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5713 scalar_shift_arg
= false;
5716 /* For internal SLP defs we have to make sure we see scalar stmts
5717 for all vector elements.
5718 ??? For different vectors we could resort to a different
5719 scalar shift operand but code-generation below simply always
5721 if (dt
[1] == vect_internal_def
5722 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5724 scalar_shift_arg
= false;
5727 /* If the shift amount is computed by a pattern stmt we cannot
5728 use the scalar amount directly thus give up and use a vector
5730 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5731 scalar_shift_arg
= false;
5735 if (dump_enabled_p ())
5736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5737 "operand mode requires invariant argument.\n");
5741 /* Vector shifted by vector. */
5742 bool was_scalar_shift_arg
= scalar_shift_arg
;
5743 if (!scalar_shift_arg
)
5745 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_NOTE
, vect_location
,
5748 "vector/vector shift/rotate found.\n");
5751 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5753 incompatible_op1_vectype_p
5754 = (op1_vectype
== NULL_TREE
5755 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5756 TYPE_VECTOR_SUBPARTS (vectype
))
5757 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5758 if (incompatible_op1_vectype_p
5760 || SLP_TREE_DEF_TYPE
5761 (SLP_TREE_CHILDREN (slp_node
)[1]) != vect_constant_def
))
5763 if (dump_enabled_p ())
5764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5765 "unusable type for last operand in"
5766 " vector/vector shift/rotate.\n");
5770 /* See if the machine has a vector shifted by scalar insn and if not
5771 then see if it has a vector shifted by vector insn. */
5774 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5776 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5778 if (dump_enabled_p ())
5779 dump_printf_loc (MSG_NOTE
, vect_location
,
5780 "vector/scalar shift/rotate found.\n");
5784 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5786 && (optab_handler (optab
, TYPE_MODE (vectype
))
5787 != CODE_FOR_nothing
))
5789 scalar_shift_arg
= false;
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_NOTE
, vect_location
,
5793 "vector/vector shift/rotate found.\n");
5795 /* Unlike the other binary operators, shifts/rotates have
5796 the rhs being int, instead of the same type as the lhs,
5797 so make sure the scalar is the right type if we are
5798 dealing with vectors of long long/long/short/char. */
5799 incompatible_op1_vectype_p
5800 = !tree_nop_conversion_p (TREE_TYPE (vectype
),
5806 /* Supportable by target? */
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5814 vec_mode
= TYPE_MODE (vectype
);
5815 icode
= (int) optab_handler (optab
, vec_mode
);
5816 if (icode
== CODE_FOR_nothing
)
5818 if (dump_enabled_p ())
5819 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5820 "op not supported by target.\n");
5821 /* Check only during analysis. */
5822 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5824 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5826 if (dump_enabled_p ())
5827 dump_printf_loc (MSG_NOTE
, vect_location
,
5828 "proceeding using word mode.\n");
5831 /* Worthwhile without SIMD support? Check only during analysis. */
5833 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5834 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5836 if (dump_enabled_p ())
5837 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5838 "not worthwhile without SIMD support.\n");
5842 if (!vec_stmt
) /* transformation not required. */
5844 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5845 DUMP_VECT_SCOPE ("vectorizable_shift");
5846 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5847 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5853 if (dump_enabled_p ())
5854 dump_printf_loc (MSG_NOTE
, vect_location
,
5855 "transform binary/unary operation.\n");
5857 if (incompatible_op1_vectype_p
&& !slp_node
)
5859 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5860 if (dt
[1] != vect_constant_def
)
5861 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5862 TREE_TYPE (vectype
), NULL
);
5866 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5868 prev_stmt_info
= NULL
;
5869 for (j
= 0; j
< ncopies
; j
++)
5874 if (scalar_shift_arg
)
5876 /* Vector shl and shr insn patterns can be defined with scalar
5877 operand 2 (shift operand). In this case, use constant or loop
5878 invariant op1 directly, without extending it to vector mode
5880 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5881 if (!VECTOR_MODE_P (optab_op2_mode
))
5883 if (dump_enabled_p ())
5884 dump_printf_loc (MSG_NOTE
, vect_location
,
5885 "operand 1 using scalar mode.\n");
5887 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5888 vec_oprnds1
.quick_push (vec_oprnd1
);
5891 /* Store vec_oprnd1 for every vector stmt to be created
5892 for SLP_NODE. We check during the analysis that all
5893 the shift arguments are the same.
5894 TODO: Allow different constants for different vector
5895 stmts generated for an SLP instance. */
5896 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5897 vec_oprnds1
.quick_push (vec_oprnd1
);
5901 else if (slp_node
&& incompatible_op1_vectype_p
)
5903 if (was_scalar_shift_arg
)
5905 /* If the argument was the same in all lanes create
5906 the correctly typed vector shift amount directly. */
5907 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5908 op1
= vect_init_vector (vinfo
, stmt_info
,
5909 op1
, TREE_TYPE (vectype
),
5910 !loop_vinfo
? gsi
: NULL
);
5911 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5912 !loop_vinfo
? gsi
: NULL
);
5913 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5914 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5915 vec_oprnds1
.quick_push (vec_oprnd1
);
5917 else if (dt
[1] == vect_constant_def
)
5919 /* Convert the scalar constant shift amounts in-place. */
5920 slp_tree shift
= SLP_TREE_CHILDREN (slp_node
)[1];
5921 gcc_assert (SLP_TREE_DEF_TYPE (shift
) == vect_constant_def
);
5922 for (unsigned i
= 0;
5923 i
< SLP_TREE_SCALAR_OPS (shift
).length (); ++i
)
5925 SLP_TREE_SCALAR_OPS (shift
)[i
]
5926 = fold_convert (TREE_TYPE (vectype
),
5927 SLP_TREE_SCALAR_OPS (shift
)[i
]);
5928 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift
)[i
])
5933 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5936 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5937 (a special case for certain kind of vector shifts); otherwise,
5938 operand 1 should be of a vector type (the usual case). */
5940 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
,
5941 &vec_oprnds0
, NULL
, slp_node
);
5943 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
,
5944 &vec_oprnds0
, &vec_oprnds1
, slp_node
);
5947 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5949 /* Arguments are ready. Create the new vector stmt. */
5950 stmt_vec_info new_stmt_info
= NULL
;
5951 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5953 vop1
= vec_oprnds1
[i
];
5954 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5955 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5956 gimple_assign_set_lhs (new_stmt
, new_temp
);
5958 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5960 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5967 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5969 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5970 prev_stmt_info
= new_stmt_info
;
5973 vec_oprnds0
.release ();
5974 vec_oprnds1
.release ();
5980 /* Function vectorizable_operation.
5982 Check if STMT_INFO performs a binary, unary or ternary operation that can
5984 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5985 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5986 Return true if STMT_INFO is vectorizable in this way. */
5989 vectorizable_operation (vec_info
*vinfo
,
5990 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5991 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5992 stmt_vector_for_cost
*cost_vec
)
5996 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5998 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5999 enum tree_code code
, orig_code
;
6000 machine_mode vec_mode
;
6004 bool target_support_p
;
6005 enum vect_def_type dt
[3]
6006 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6008 stmt_vec_info prev_stmt_info
;
6009 poly_uint64 nunits_in
;
6010 poly_uint64 nunits_out
;
6012 int ncopies
, vec_num
;
6014 vec
<tree
> vec_oprnds0
= vNULL
;
6015 vec
<tree
> vec_oprnds1
= vNULL
;
6016 vec
<tree
> vec_oprnds2
= vNULL
;
6017 tree vop0
, vop1
, vop2
;
6018 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6020 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6023 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6027 /* Is STMT a vectorizable binary/unary operation? */
6028 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6032 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6035 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6037 /* Shifts are handled in vectorizable_shift. */
6038 if (code
== LSHIFT_EXPR
6039 || code
== RSHIFT_EXPR
6040 || code
== LROTATE_EXPR
6041 || code
== RROTATE_EXPR
)
6044 /* Comparisons are handled in vectorizable_comparison. */
6045 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6048 /* Conditions are handled in vectorizable_condition. */
6049 if (code
== COND_EXPR
)
6052 /* For pointer addition and subtraction, we should use the normal
6053 plus and minus for the vector operation. */
6054 if (code
== POINTER_PLUS_EXPR
)
6056 if (code
== POINTER_DIFF_EXPR
)
6059 /* Support only unary or binary operations. */
6060 op_type
= TREE_CODE_LENGTH (code
);
6061 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6063 if (dump_enabled_p ())
6064 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6065 "num. args = %d (not unary/binary/ternary op).\n",
6070 scalar_dest
= gimple_assign_lhs (stmt
);
6071 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6073 /* Most operations cannot handle bit-precision types without extra
6075 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6077 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6078 /* Exception are bitwise binary operations. */
6079 && code
!= BIT_IOR_EXPR
6080 && code
!= BIT_XOR_EXPR
6081 && code
!= BIT_AND_EXPR
)
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6085 "bit-precision arithmetic not supported.\n");
6089 op0
= gimple_assign_rhs1 (stmt
);
6090 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
6092 if (dump_enabled_p ())
6093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6094 "use not simple.\n");
6097 /* If op0 is an external or constant def, infer the vector type
6098 from the scalar type. */
6101 /* For boolean type we cannot determine vectype by
6102 invariant value (don't know whether it is a vector
6103 of booleans or vector of integers). We use output
6104 vectype because operations on boolean don't change
6106 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6108 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6110 if (dump_enabled_p ())
6111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6112 "not supported operation on bool value.\n");
6115 vectype
= vectype_out
;
6118 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6122 gcc_assert (vectype
);
6125 if (dump_enabled_p ())
6126 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6127 "no vectype for scalar type %T\n",
6133 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6134 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6135 if (maybe_ne (nunits_out
, nunits_in
))
6138 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6139 if (op_type
== binary_op
|| op_type
== ternary_op
)
6141 op1
= gimple_assign_rhs2 (stmt
);
6142 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype2
))
6144 if (dump_enabled_p ())
6145 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6146 "use not simple.\n");
6150 if (op_type
== ternary_op
)
6152 op2
= gimple_assign_rhs3 (stmt
);
6153 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2], &vectype3
))
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6157 "use not simple.\n");
6162 /* Multiple types in SLP are handled by creating the appropriate number of
6163 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6168 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6172 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6176 gcc_assert (ncopies
>= 1);
6178 /* Reject attempts to combine mask types with nonmask types, e.g. if
6179 we have an AND between a (nonmask) boolean loaded from memory and
6180 a (mask) boolean result of a comparison.
6182 TODO: We could easily fix these cases up using pattern statements. */
6183 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6184 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6185 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6187 if (dump_enabled_p ())
6188 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6189 "mixed mask and nonmask vector types\n");
6193 /* Supportable by target? */
6195 vec_mode
= TYPE_MODE (vectype
);
6196 if (code
== MULT_HIGHPART_EXPR
)
6197 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6200 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6203 if (dump_enabled_p ())
6204 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6208 target_support_p
= (optab_handler (optab
, vec_mode
)
6209 != CODE_FOR_nothing
);
6212 if (!target_support_p
)
6214 if (dump_enabled_p ())
6215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6216 "op not supported by target.\n");
6217 /* Check only during analysis. */
6218 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6219 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6221 if (dump_enabled_p ())
6222 dump_printf_loc (MSG_NOTE
, vect_location
,
6223 "proceeding using word mode.\n");
6226 /* Worthwhile without SIMD support? Check only during analysis. */
6227 if (!VECTOR_MODE_P (vec_mode
)
6229 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6231 if (dump_enabled_p ())
6232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6233 "not worthwhile without SIMD support.\n");
6237 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6238 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6239 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6241 if (!vec_stmt
) /* transformation not required. */
6243 /* If this operation is part of a reduction, a fully-masked loop
6244 should only change the active lanes of the reduction chain,
6245 keeping the inactive lanes as-is. */
6247 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
6250 if (cond_fn
== IFN_LAST
6251 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6252 OPTIMIZE_FOR_SPEED
))
6254 if (dump_enabled_p ())
6255 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6256 "can't use a fully-masked loop because no"
6257 " conditional operation is available.\n");
6258 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
6261 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6265 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6266 DUMP_VECT_SCOPE ("vectorizable_operation");
6267 vect_model_simple_cost (vinfo
, stmt_info
,
6268 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6274 if (dump_enabled_p ())
6275 dump_printf_loc (MSG_NOTE
, vect_location
,
6276 "transform binary/unary operation.\n");
6278 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6280 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6281 vectors with unsigned elements, but the result is signed. So, we
6282 need to compute the MINUS_EXPR into vectype temporary and
6283 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6284 tree vec_cvt_dest
= NULL_TREE
;
6285 if (orig_code
== POINTER_DIFF_EXPR
)
6287 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6288 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6292 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6294 /* In case the vectorization factor (VF) is bigger than the number
6295 of elements that we can fit in a vectype (nunits), we have to generate
6296 more than one vector stmt - i.e - we need to "unroll" the
6297 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6298 from one copy of the vector stmt to the next, in the field
6299 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6300 stages to find the correct vector defs to be used when vectorizing
6301 stmts that use the defs of the current stmt. The example below
6302 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6303 we need to create 4 vectorized stmts):
6305 before vectorization:
6306 RELATED_STMT VEC_STMT
6310 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6312 RELATED_STMT VEC_STMT
6313 VS1_0: vx0 = memref0 VS1_1 -
6314 VS1_1: vx1 = memref1 VS1_2 -
6315 VS1_2: vx2 = memref2 VS1_3 -
6316 VS1_3: vx3 = memref3 - -
6317 S1: x = load - VS1_0
6320 step2: vectorize stmt S2 (done here):
6321 To vectorize stmt S2 we first need to find the relevant vector
6322 def for the first operand 'x'. This is, as usual, obtained from
6323 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6324 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6325 relevant vector def 'vx0'. Having found 'vx0' we can generate
6326 the vector stmt VS2_0, and as usual, record it in the
6327 STMT_VINFO_VEC_STMT of stmt S2.
6328 When creating the second copy (VS2_1), we obtain the relevant vector
6329 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6330 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6331 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6332 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6333 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6334 chain of stmts and pointers:
6335 RELATED_STMT VEC_STMT
6336 VS1_0: vx0 = memref0 VS1_1 -
6337 VS1_1: vx1 = memref1 VS1_2 -
6338 VS1_2: vx2 = memref2 VS1_3 -
6339 VS1_3: vx3 = memref3 - -
6340 S1: x = load - VS1_0
6341 VS2_0: vz0 = vx0 + v1 VS2_1 -
6342 VS2_1: vz1 = vx1 + v1 VS2_2 -
6343 VS2_2: vz2 = vx2 + v1 VS2_3 -
6344 VS2_3: vz3 = vx3 + v1 - -
6345 S2: z = x + 1 - VS2_0 */
6347 prev_stmt_info
= NULL
;
6348 for (j
= 0; j
< ncopies
; j
++)
6353 if (op_type
== binary_op
)
6354 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
,
6355 &vec_oprnds0
, &vec_oprnds1
, slp_node
);
6356 else if (op_type
== ternary_op
)
6360 auto_vec
<vec
<tree
> > vec_defs(3);
6361 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
6362 vec_oprnds0
= vec_defs
[0];
6363 vec_oprnds1
= vec_defs
[1];
6364 vec_oprnds2
= vec_defs
[2];
6368 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
, &vec_oprnds0
,
6369 &vec_oprnds1
, NULL
);
6370 vect_get_vec_defs (vinfo
, op2
, NULL_TREE
, stmt_info
,
6371 &vec_oprnds2
, NULL
, NULL
);
6375 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
,
6380 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6381 if (op_type
== ternary_op
)
6383 tree vec_oprnd
= vec_oprnds2
.pop ();
6384 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6389 /* Arguments are ready. Create the new vector stmt. */
6390 stmt_vec_info new_stmt_info
= NULL
;
6391 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6393 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6394 ? vec_oprnds1
[i
] : NULL_TREE
);
6395 vop2
= ((op_type
== ternary_op
)
6396 ? vec_oprnds2
[i
] : NULL_TREE
);
6397 if (masked_loop_p
&& reduc_idx
>= 0)
6399 /* Perform the operation on active elements only and take
6400 inactive elements from the reduction chain input. */
6402 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6403 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6404 vectype
, i
* ncopies
+ j
);
6405 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6407 new_temp
= make_ssa_name (vec_dest
, call
);
6408 gimple_call_set_lhs (call
, new_temp
);
6409 gimple_call_set_nothrow (call
, true);
6411 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6415 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6417 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6418 gimple_assign_set_lhs (new_stmt
, new_temp
);
6420 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6423 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6425 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6427 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6428 gimple_assign_set_lhs (new_stmt
, new_temp
);
6429 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
6434 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6441 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6443 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6444 prev_stmt_info
= new_stmt_info
;
6447 vec_oprnds0
.release ();
6448 vec_oprnds1
.release ();
6449 vec_oprnds2
.release ();
6454 /* A helper function to ensure data reference DR_INFO's base alignment. */
6457 ensure_base_align (dr_vec_info
*dr_info
)
6459 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6462 if (dr_info
->base_misaligned
)
6464 tree base_decl
= dr_info
->base_decl
;
6466 // We should only be able to increase the alignment of a base object if
6467 // we know what its new alignment should be at compile time.
6468 unsigned HOST_WIDE_INT align_base_to
=
6469 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6471 if (decl_in_symtab_p (base_decl
))
6472 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6473 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6475 SET_DECL_ALIGN (base_decl
, align_base_to
);
6476 DECL_USER_ALIGN (base_decl
) = 1;
6478 dr_info
->base_misaligned
= false;
6483 /* Function get_group_alias_ptr_type.
6485 Return the alias type for the group starting at FIRST_STMT_INFO. */
6488 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6490 struct data_reference
*first_dr
, *next_dr
;
6492 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6493 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6494 while (next_stmt_info
)
6496 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6497 if (get_alias_set (DR_REF (first_dr
))
6498 != get_alias_set (DR_REF (next_dr
)))
6500 if (dump_enabled_p ())
6501 dump_printf_loc (MSG_NOTE
, vect_location
,
6502 "conflicting alias set types.\n");
6503 return ptr_type_node
;
6505 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6507 return reference_alias_ptr_type (DR_REF (first_dr
));
6511 /* Function scan_operand_equal_p.
6513 Helper function for check_scan_store. Compare two references
6514 with .GOMP_SIMD_LANE bases. */
6517 scan_operand_equal_p (tree ref1
, tree ref2
)
6519 tree ref
[2] = { ref1
, ref2
};
6520 poly_int64 bitsize
[2], bitpos
[2];
6521 tree offset
[2], base
[2];
6522 for (int i
= 0; i
< 2; ++i
)
6525 int unsignedp
, reversep
, volatilep
= 0;
6526 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6527 &offset
[i
], &mode
, &unsignedp
,
6528 &reversep
, &volatilep
);
6529 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6531 if (TREE_CODE (base
[i
]) == MEM_REF
6532 && offset
[i
] == NULL_TREE
6533 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6535 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6536 if (is_gimple_assign (def_stmt
)
6537 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6538 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6539 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6541 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6543 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6544 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6549 if (!operand_equal_p (base
[0], base
[1], 0))
6551 if (maybe_ne (bitsize
[0], bitsize
[1]))
6553 if (offset
[0] != offset
[1])
6555 if (!offset
[0] || !offset
[1])
6557 if (!operand_equal_p (offset
[0], offset
[1], 0))
6560 for (int i
= 0; i
< 2; ++i
)
6562 step
[i
] = integer_one_node
;
6563 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6565 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6566 if (is_gimple_assign (def_stmt
)
6567 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6568 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6571 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6572 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6575 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6577 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6578 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6580 tree rhs1
= NULL_TREE
;
6581 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6583 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6584 if (gimple_assign_cast_p (def_stmt
))
6585 rhs1
= gimple_assign_rhs1 (def_stmt
);
6587 else if (CONVERT_EXPR_P (offset
[i
]))
6588 rhs1
= TREE_OPERAND (offset
[i
], 0);
6590 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6591 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6592 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6593 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6596 if (!operand_equal_p (offset
[0], offset
[1], 0)
6597 || !operand_equal_p (step
[0], step
[1], 0))
6605 enum scan_store_kind
{
6606 /* Normal permutation. */
6607 scan_store_kind_perm
,
6609 /* Whole vector left shift permutation with zero init. */
6610 scan_store_kind_lshift_zero
,
6612 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6613 scan_store_kind_lshift_cond
6616 /* Function check_scan_store.
6618 Verify if we can perform the needed permutations or whole vector shifts.
6619 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6620 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6621 to do at each step. */
6624 scan_store_can_perm_p (tree vectype
, tree init
,
6625 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6627 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6628 unsigned HOST_WIDE_INT nunits
;
6629 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6631 int units_log2
= exact_log2 (nunits
);
6632 if (units_log2
<= 0)
6636 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6637 for (i
= 0; i
<= units_log2
; ++i
)
6639 unsigned HOST_WIDE_INT j
, k
;
6640 enum scan_store_kind kind
= scan_store_kind_perm
;
6641 vec_perm_builder
sel (nunits
, nunits
, 1);
6642 sel
.quick_grow (nunits
);
6643 if (i
== units_log2
)
6645 for (j
= 0; j
< nunits
; ++j
)
6646 sel
[j
] = nunits
- 1;
6650 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6652 for (k
= 0; j
< nunits
; ++j
, ++k
)
6653 sel
[j
] = nunits
+ k
;
6655 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6656 if (!can_vec_perm_const_p (vec_mode
, indices
))
6658 if (i
== units_log2
)
6661 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6663 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6665 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6666 /* Whole vector shifts shift in zeros, so if init is all zero
6667 constant, there is no need to do anything further. */
6668 if ((TREE_CODE (init
) != INTEGER_CST
6669 && TREE_CODE (init
) != REAL_CST
)
6670 || !initializer_zerop (init
))
6672 tree masktype
= truth_type_for (vectype
);
6673 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6675 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6678 kind
= whole_vector_shift_kind
;
6680 if (use_whole_vector
)
6682 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6683 use_whole_vector
->safe_grow_cleared (i
);
6684 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6685 use_whole_vector
->safe_push (kind
);
6693 /* Function check_scan_store.
6695 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6698 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6699 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6700 vect_memory_access_type memory_access_type
)
6702 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6703 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6706 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6709 || memory_access_type
!= VMAT_CONTIGUOUS
6710 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6711 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6712 || loop_vinfo
== NULL
6713 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6714 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6715 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6716 || !integer_zerop (DR_INIT (dr_info
->dr
))
6717 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6718 || !alias_sets_conflict_p (get_alias_set (vectype
),
6719 get_alias_set (TREE_TYPE (ref_type
))))
6721 if (dump_enabled_p ())
6722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6723 "unsupported OpenMP scan store.\n");
6727 /* We need to pattern match code built by OpenMP lowering and simplified
6728 by following optimizations into something we can handle.
6729 #pragma omp simd reduction(inscan,+:r)
6733 #pragma omp scan inclusive (r)
6736 shall have body with:
6737 // Initialization for input phase, store the reduction initializer:
6738 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6739 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6741 // Actual input phase:
6743 r.0_5 = D.2042[_20];
6746 // Initialization for scan phase:
6747 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6753 // Actual scan phase:
6755 r.1_8 = D.2042[_20];
6757 The "omp simd array" variable D.2042 holds the privatized copy used
6758 inside of the loop and D.2043 is another one that holds copies of
6759 the current original list item. The separate GOMP_SIMD_LANE ifn
6760 kinds are there in order to allow optimizing the initializer store
6761 and combiner sequence, e.g. if it is originally some C++ish user
6762 defined reduction, but allow the vectorizer to pattern recognize it
6763 and turn into the appropriate vectorized scan.
6765 For exclusive scan, this is slightly different:
6766 #pragma omp simd reduction(inscan,+:r)
6770 #pragma omp scan exclusive (r)
6773 shall have body with:
6774 // Initialization for input phase, store the reduction initializer:
6775 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6776 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6778 // Actual input phase:
6780 r.0_5 = D.2042[_20];
6783 // Initialization for scan phase:
6784 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6790 // Actual scan phase:
6792 r.1_8 = D.2044[_20];
6795 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6797 /* Match the D.2042[_21] = 0; store above. Just require that
6798 it is a constant or external definition store. */
6799 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6802 if (dump_enabled_p ())
6803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6804 "unsupported OpenMP scan initializer store.\n");
6808 if (! loop_vinfo
->scan_map
)
6809 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6810 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6811 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6814 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6816 /* These stores can be vectorized normally. */
6820 if (rhs_dt
!= vect_internal_def
)
6823 if (dump_enabled_p ())
6824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6825 "unsupported OpenMP scan combiner pattern.\n");
6829 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6830 tree rhs
= gimple_assign_rhs1 (stmt
);
6831 if (TREE_CODE (rhs
) != SSA_NAME
)
6834 gimple
*other_store_stmt
= NULL
;
6835 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6836 bool inscan_var_store
6837 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6839 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6841 if (!inscan_var_store
)
6843 use_operand_p use_p
;
6844 imm_use_iterator iter
;
6845 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6847 gimple
*use_stmt
= USE_STMT (use_p
);
6848 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6850 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6851 || !is_gimple_assign (use_stmt
)
6852 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6854 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6856 other_store_stmt
= use_stmt
;
6858 if (other_store_stmt
== NULL
)
6860 rhs
= gimple_assign_lhs (other_store_stmt
);
6861 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6865 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6867 use_operand_p use_p
;
6868 imm_use_iterator iter
;
6869 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6871 gimple
*use_stmt
= USE_STMT (use_p
);
6872 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6874 if (other_store_stmt
)
6876 other_store_stmt
= use_stmt
;
6882 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6883 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6884 || !is_gimple_assign (def_stmt
)
6885 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6888 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6889 /* For pointer addition, we should use the normal plus for the vector
6893 case POINTER_PLUS_EXPR
:
6896 case MULT_HIGHPART_EXPR
:
6901 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6904 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6905 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6906 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6909 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6910 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6911 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6912 || !gimple_assign_load_p (load1_stmt
)
6913 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6914 || !gimple_assign_load_p (load2_stmt
))
6917 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6918 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6919 if (load1_stmt_info
== NULL
6920 || load2_stmt_info
== NULL
6921 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6922 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6923 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6924 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6927 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6929 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6930 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6931 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6933 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6935 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6939 use_operand_p use_p
;
6940 imm_use_iterator iter
;
6941 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6943 gimple
*use_stmt
= USE_STMT (use_p
);
6944 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6946 if (other_store_stmt
)
6948 other_store_stmt
= use_stmt
;
6952 if (other_store_stmt
== NULL
)
6954 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6955 || !gimple_store_p (other_store_stmt
))
6958 stmt_vec_info other_store_stmt_info
6959 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6960 if (other_store_stmt_info
== NULL
6961 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6962 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6965 gimple
*stmt1
= stmt
;
6966 gimple
*stmt2
= other_store_stmt
;
6967 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6968 std::swap (stmt1
, stmt2
);
6969 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6970 gimple_assign_rhs1 (load2_stmt
)))
6972 std::swap (rhs1
, rhs2
);
6973 std::swap (load1_stmt
, load2_stmt
);
6974 std::swap (load1_stmt_info
, load2_stmt_info
);
6976 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6977 gimple_assign_rhs1 (load1_stmt
)))
6980 tree var3
= NULL_TREE
;
6981 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6982 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6983 gimple_assign_rhs1 (load2_stmt
)))
6985 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6987 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6988 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6989 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6991 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6992 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6993 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6994 || lookup_attribute ("omp simd inscan exclusive",
6995 DECL_ATTRIBUTES (var3
)))
6999 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7000 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7001 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7004 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7005 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7006 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7007 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7008 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7009 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7012 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7013 std::swap (var1
, var2
);
7015 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7017 if (!lookup_attribute ("omp simd inscan exclusive",
7018 DECL_ATTRIBUTES (var1
)))
7023 if (loop_vinfo
->scan_map
== NULL
)
7025 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7029 /* The IL is as expected, now check if we can actually vectorize it.
7036 should be vectorized as (where _40 is the vectorized rhs
7037 from the D.2042[_21] = 0; store):
7038 _30 = MEM <vector(8) int> [(int *)&D.2043];
7039 _31 = MEM <vector(8) int> [(int *)&D.2042];
7040 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7042 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7043 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7045 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7046 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7047 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7049 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7050 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7052 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7053 MEM <vector(8) int> [(int *)&D.2043] = _39;
7054 MEM <vector(8) int> [(int *)&D.2042] = _38;
7061 should be vectorized as (where _40 is the vectorized rhs
7062 from the D.2042[_21] = 0; store):
7063 _30 = MEM <vector(8) int> [(int *)&D.2043];
7064 _31 = MEM <vector(8) int> [(int *)&D.2042];
7065 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7066 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7068 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7069 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7070 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7072 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7073 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7074 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7076 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7077 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7080 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7081 MEM <vector(8) int> [(int *)&D.2044] = _39;
7082 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7083 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7084 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7085 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7088 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7089 if (units_log2
== -1)
7096 /* Function vectorizable_scan_store.
7098 Helper of vectorizable_score, arguments like on vectorizable_store.
7099 Handle only the transformation, checking is done in check_scan_store. */
7102 vectorizable_scan_store (vec_info
*vinfo
,
7103 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7104 stmt_vec_info
*vec_stmt
, int ncopies
)
7106 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7107 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7108 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7109 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7111 if (dump_enabled_p ())
7112 dump_printf_loc (MSG_NOTE
, vect_location
,
7113 "transform scan store. ncopies = %d\n", ncopies
);
7115 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7116 tree rhs
= gimple_assign_rhs1 (stmt
);
7117 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7119 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7120 bool inscan_var_store
7121 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7123 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7125 use_operand_p use_p
;
7126 imm_use_iterator iter
;
7127 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7129 gimple
*use_stmt
= USE_STMT (use_p
);
7130 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7132 rhs
= gimple_assign_lhs (use_stmt
);
7137 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7138 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7139 if (code
== POINTER_PLUS_EXPR
)
7141 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7142 && commutative_tree_code (code
));
7143 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7144 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7145 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7146 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7147 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7148 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7149 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7150 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7151 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7152 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7153 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7155 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7157 std::swap (rhs1
, rhs2
);
7158 std::swap (var1
, var2
);
7159 std::swap (load1_dr_info
, load2_dr_info
);
7162 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7165 unsigned HOST_WIDE_INT nunits
;
7166 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7168 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7169 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7170 gcc_assert (units_log2
> 0);
7171 auto_vec
<tree
, 16> perms
;
7172 perms
.quick_grow (units_log2
+ 1);
7173 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7174 for (int i
= 0; i
<= units_log2
; ++i
)
7176 unsigned HOST_WIDE_INT j
, k
;
7177 vec_perm_builder
sel (nunits
, nunits
, 1);
7178 sel
.quick_grow (nunits
);
7179 if (i
== units_log2
)
7180 for (j
= 0; j
< nunits
; ++j
)
7181 sel
[j
] = nunits
- 1;
7184 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7186 for (k
= 0; j
< nunits
; ++j
, ++k
)
7187 sel
[j
] = nunits
+ k
;
7189 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7190 if (!use_whole_vector
.is_empty ()
7191 && use_whole_vector
[i
] != scan_store_kind_perm
)
7193 if (zero_vec
== NULL_TREE
)
7194 zero_vec
= build_zero_cst (vectype
);
7195 if (masktype
== NULL_TREE
7196 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7197 masktype
= truth_type_for (vectype
);
7198 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7201 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7204 stmt_vec_info prev_stmt_info
= NULL
;
7205 tree vec_oprnd1
= NULL_TREE
;
7206 tree vec_oprnd2
= NULL_TREE
;
7207 tree vec_oprnd3
= NULL_TREE
;
7208 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7209 tree dataref_offset
= build_int_cst (ref_type
, 0);
7210 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7211 vectype
, VMAT_CONTIGUOUS
);
7212 tree ldataref_ptr
= NULL_TREE
;
7213 tree orig
= NULL_TREE
;
7214 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7215 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7216 for (int j
= 0; j
< ncopies
; j
++)
7218 stmt_vec_info new_stmt_info
;
7221 vec_oprnd1
= vect_get_vec_def_for_operand (vinfo
, *init
, stmt_info
);
7222 if (ldataref_ptr
== NULL
)
7223 vec_oprnd2
= vect_get_vec_def_for_operand (vinfo
, rhs1
, stmt_info
);
7224 vec_oprnd3
= vect_get_vec_def_for_operand (vinfo
, rhs2
, stmt_info
);
7229 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
7230 if (ldataref_ptr
== NULL
)
7231 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
7232 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
7233 if (!inscan_var_store
)
7234 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7239 vec_oprnd2
= make_ssa_name (vectype
);
7240 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7241 unshare_expr (ldataref_ptr
),
7243 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7244 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7245 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7246 if (prev_stmt_info
== NULL
)
7247 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7249 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7250 prev_stmt_info
= new_stmt_info
;
7253 tree v
= vec_oprnd2
;
7254 for (int i
= 0; i
< units_log2
; ++i
)
7256 tree new_temp
= make_ssa_name (vectype
);
7257 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7259 && (use_whole_vector
[i
]
7260 != scan_store_kind_perm
))
7261 ? zero_vec
: vec_oprnd1
, v
,
7263 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7264 if (prev_stmt_info
== NULL
)
7265 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7267 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7268 prev_stmt_info
= new_stmt_info
;
7270 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7272 /* Whole vector shift shifted in zero bits, but if *init
7273 is not initializer_zerop, we need to replace those elements
7274 with elements from vec_oprnd1. */
7275 tree_vector_builder
vb (masktype
, nunits
, 1);
7276 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7277 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7278 ? boolean_false_node
: boolean_true_node
);
7280 tree new_temp2
= make_ssa_name (vectype
);
7281 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7282 new_temp
, vec_oprnd1
);
7283 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
7285 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7286 prev_stmt_info
= new_stmt_info
;
7287 new_temp
= new_temp2
;
7290 /* For exclusive scan, perform the perms[i] permutation once
7293 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7301 tree new_temp2
= make_ssa_name (vectype
);
7302 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7303 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7304 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7305 prev_stmt_info
= new_stmt_info
;
7310 tree new_temp
= make_ssa_name (vectype
);
7311 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7312 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7313 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7314 prev_stmt_info
= new_stmt_info
;
7316 tree last_perm_arg
= new_temp
;
7317 /* For exclusive scan, new_temp computed above is the exclusive scan
7318 prefix sum. Turn it into inclusive prefix sum for the broadcast
7319 of the last element into orig. */
7320 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7322 last_perm_arg
= make_ssa_name (vectype
);
7323 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7324 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7325 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7326 prev_stmt_info
= new_stmt_info
;
7329 orig
= make_ssa_name (vectype
);
7330 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7331 last_perm_arg
, perms
[units_log2
]);
7332 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7333 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7334 prev_stmt_info
= new_stmt_info
;
7336 if (!inscan_var_store
)
7338 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7339 unshare_expr (dataref_ptr
),
7341 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7342 g
= gimple_build_assign (data_ref
, new_temp
);
7343 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7344 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7345 prev_stmt_info
= new_stmt_info
;
7349 if (inscan_var_store
)
7350 for (int j
= 0; j
< ncopies
; j
++)
7353 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7355 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7356 unshare_expr (dataref_ptr
),
7358 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7359 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7360 stmt_vec_info new_stmt_info
7361 = vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7362 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7363 prev_stmt_info
= new_stmt_info
;
7369 /* Function vectorizable_store.
7371 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7372 that can be vectorized.
7373 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7374 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7375 Return true if STMT_INFO is vectorizable in this way. */
7378 vectorizable_store (vec_info
*vinfo
,
7379 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7380 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7381 stmt_vector_for_cost
*cost_vec
)
7385 tree vec_oprnd
= NULL_TREE
;
7387 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7388 class loop
*loop
= NULL
;
7389 machine_mode vec_mode
;
7391 enum dr_alignment_support alignment_support_scheme
;
7392 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7393 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7394 stmt_vec_info prev_stmt_info
= NULL
;
7395 tree dataref_ptr
= NULL_TREE
;
7396 tree dataref_offset
= NULL_TREE
;
7397 gimple
*ptr_incr
= NULL
;
7400 stmt_vec_info first_stmt_info
;
7402 unsigned int group_size
, i
;
7403 vec
<tree
> oprnds
= vNULL
;
7404 vec
<tree
> result_chain
= vNULL
;
7405 tree offset
= NULL_TREE
;
7406 vec
<tree
> vec_oprnds
= vNULL
;
7407 bool slp
= (slp_node
!= NULL
);
7408 unsigned int vec_num
;
7409 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7411 gather_scatter_info gs_info
;
7413 vec_load_store_type vls_type
;
7416 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7419 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7423 /* Is vectorizable store? */
7425 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7426 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7428 tree scalar_dest
= gimple_assign_lhs (assign
);
7429 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7430 && is_pattern_stmt_p (stmt_info
))
7431 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7432 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7433 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7434 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7435 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7436 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7437 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7438 && TREE_CODE (scalar_dest
) != MEM_REF
)
7443 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7444 if (!call
|| !gimple_call_internal_p (call
))
7447 internal_fn ifn
= gimple_call_internal_fn (call
);
7448 if (!internal_store_fn_p (ifn
))
7451 if (slp_node
!= NULL
)
7453 if (dump_enabled_p ())
7454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7455 "SLP of masked stores not supported.\n");
7459 int mask_index
= internal_fn_mask_index (ifn
);
7460 if (mask_index
>= 0)
7462 mask
= gimple_call_arg (call
, mask_index
);
7463 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
7469 op
= vect_get_store_rhs (stmt_info
);
7471 /* Cannot have hybrid store SLP -- that would mean storing to the
7472 same location twice. */
7473 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7475 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7476 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7480 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7481 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7486 /* Multiple types in SLP are handled by creating the appropriate number of
7487 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7492 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7494 gcc_assert (ncopies
>= 1);
7496 /* FORNOW. This restriction should be relaxed. */
7497 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7499 if (dump_enabled_p ())
7500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7501 "multiple types in nested loop.\n");
7505 if (!vect_check_store_rhs (vinfo
, stmt_info
,
7506 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7509 elem_type
= TREE_TYPE (vectype
);
7510 vec_mode
= TYPE_MODE (vectype
);
7512 if (!STMT_VINFO_DATA_REF (stmt_info
))
7515 vect_memory_access_type memory_access_type
;
7516 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, vls_type
,
7517 ncopies
, &memory_access_type
, &gs_info
))
7522 if (memory_access_type
== VMAT_CONTIGUOUS
)
7524 if (!VECTOR_MODE_P (vec_mode
)
7525 || !can_vec_mask_load_store_p (vec_mode
,
7526 TYPE_MODE (mask_vectype
), false))
7529 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7530 && (memory_access_type
!= VMAT_GATHER_SCATTER
7531 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7533 if (dump_enabled_p ())
7534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7535 "unsupported access type for masked store.\n");
7541 /* FORNOW. In some cases can vectorize even if data-type not supported
7542 (e.g. - array initialization with 0). */
7543 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7547 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7548 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7549 && memory_access_type
!= VMAT_GATHER_SCATTER
7550 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7553 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7554 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7555 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7559 first_stmt_info
= stmt_info
;
7560 first_dr_info
= dr_info
;
7561 group_size
= vec_num
= 1;
7564 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7566 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7567 memory_access_type
))
7571 if (!vec_stmt
) /* transformation not required. */
7573 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7576 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7577 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7578 memory_access_type
, &gs_info
, mask
);
7580 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7581 vect_model_store_cost (vinfo
, stmt_info
, ncopies
, rhs_dt
,
7582 memory_access_type
, vls_type
, slp_node
, cost_vec
);
7585 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7589 ensure_base_align (dr_info
);
7591 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7593 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7594 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7595 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7596 tree ptr
, var
, scale
, vec_mask
;
7597 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7598 tree mask_halfvectype
= mask_vectype
;
7599 edge pe
= loop_preheader_edge (loop
);
7602 enum { NARROW
, NONE
, WIDEN
} modifier
;
7603 poly_uint64 scatter_off_nunits
7604 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7606 if (known_eq (nunits
, scatter_off_nunits
))
7608 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7612 /* Currently gathers and scatters are only supported for
7613 fixed-length vectors. */
7614 unsigned int count
= scatter_off_nunits
.to_constant ();
7615 vec_perm_builder
sel (count
, count
, 1);
7616 for (i
= 0; i
< (unsigned int) count
; ++i
)
7617 sel
.quick_push (i
| (count
/ 2));
7619 vec_perm_indices
indices (sel
, 1, count
);
7620 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7622 gcc_assert (perm_mask
!= NULL_TREE
);
7624 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7628 /* Currently gathers and scatters are only supported for
7629 fixed-length vectors. */
7630 unsigned int count
= nunits
.to_constant ();
7631 vec_perm_builder
sel (count
, count
, 1);
7632 for (i
= 0; i
< (unsigned int) count
; ++i
)
7633 sel
.quick_push (i
| (count
/ 2));
7635 vec_perm_indices
indices (sel
, 2, count
);
7636 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7637 gcc_assert (perm_mask
!= NULL_TREE
);
7641 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7646 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7647 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7648 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7649 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7650 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7651 scaletype
= TREE_VALUE (arglist
);
7653 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7654 && TREE_CODE (rettype
) == VOID_TYPE
);
7656 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7657 if (!is_gimple_min_invariant (ptr
))
7659 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7660 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7661 gcc_assert (!new_bb
);
7664 if (mask
== NULL_TREE
)
7666 mask_arg
= build_int_cst (masktype
, -1);
7667 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7668 mask_arg
, masktype
, NULL
);
7671 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7673 prev_stmt_info
= NULL
;
7674 for (j
= 0; j
< ncopies
; ++j
)
7678 src
= vec_oprnd1
= vect_get_vec_def_for_operand (vinfo
,
7680 op
= vec_oprnd0
= vect_get_vec_def_for_operand (vinfo
,
7684 mask_op
= vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
,
7687 else if (modifier
!= NONE
&& (j
& 1))
7689 if (modifier
== WIDEN
)
7692 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7694 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7695 perm_mask
, stmt_info
, gsi
);
7698 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7701 else if (modifier
== NARROW
)
7703 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7704 perm_mask
, stmt_info
, gsi
);
7705 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7713 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7715 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7718 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7722 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7724 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7725 TYPE_VECTOR_SUBPARTS (srctype
)));
7726 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7727 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7729 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7730 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7734 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7736 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7737 TYPE_VECTOR_SUBPARTS (idxtype
)));
7738 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7739 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7741 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7742 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7750 if (modifier
== NARROW
)
7752 var
= vect_get_new_ssa_name (mask_halfvectype
,
7755 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7756 : VEC_UNPACK_LO_EXPR
,
7758 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7761 tree optype
= TREE_TYPE (mask_arg
);
7762 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7765 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7766 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7767 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7769 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7770 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7772 if (!useless_type_conversion_p (masktype
, utype
))
7774 gcc_assert (TYPE_PRECISION (utype
)
7775 <= TYPE_PRECISION (masktype
));
7776 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7777 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7778 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7784 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7785 stmt_vec_info new_stmt_info
7786 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7788 if (prev_stmt_info
== NULL
)
7789 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7791 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7792 prev_stmt_info
= new_stmt_info
;
7796 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7797 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7799 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7800 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7805 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7807 /* We vectorize all the stmts of the interleaving group when we
7808 reach the last stmt in the group. */
7809 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7810 < DR_GROUP_SIZE (first_stmt_info
)
7819 grouped_store
= false;
7820 /* VEC_NUM is the number of vect stmts to be created for this
7822 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7823 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7824 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7825 == first_stmt_info
);
7826 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7827 op
= vect_get_store_rhs (first_stmt_info
);
7830 /* VEC_NUM is the number of vect stmts to be created for this
7832 vec_num
= group_size
;
7834 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7837 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7839 if (dump_enabled_p ())
7840 dump_printf_loc (MSG_NOTE
, vect_location
,
7841 "transform store. ncopies = %d\n", ncopies
);
7843 if (memory_access_type
== VMAT_ELEMENTWISE
7844 || memory_access_type
== VMAT_STRIDED_SLP
)
7846 gimple_stmt_iterator incr_gsi
;
7852 tree stride_base
, stride_step
, alias_off
;
7856 /* Checked by get_load_store_type. */
7857 unsigned int const_nunits
= nunits
.to_constant ();
7859 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7860 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7862 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7864 = fold_build_pointer_plus
7865 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7866 size_binop (PLUS_EXPR
,
7867 convert_to_ptrofftype (dr_offset
),
7868 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7869 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7871 /* For a store with loop-invariant (but other than power-of-2)
7872 stride (i.e. not a grouped access) like so:
7874 for (i = 0; i < n; i += stride)
7877 we generate a new induction variable and new stores from
7878 the components of the (vectorized) rhs:
7880 for (j = 0; ; j += VF*stride)
7885 array[j + stride] = tmp2;
7889 unsigned nstores
= const_nunits
;
7891 tree ltype
= elem_type
;
7892 tree lvectype
= vectype
;
7895 if (group_size
< const_nunits
7896 && const_nunits
% group_size
== 0)
7898 nstores
= const_nunits
/ group_size
;
7900 ltype
= build_vector_type (elem_type
, group_size
);
7903 /* First check if vec_extract optab doesn't support extraction
7904 of vector elts directly. */
7905 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7907 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7908 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7909 group_size
).exists (&vmode
)
7910 || (convert_optab_handler (vec_extract_optab
,
7911 TYPE_MODE (vectype
), vmode
)
7912 == CODE_FOR_nothing
))
7914 /* Try to avoid emitting an extract of vector elements
7915 by performing the extracts using an integer type of the
7916 same size, extracting from a vector of those and then
7917 re-interpreting it as the original vector type if
7920 = group_size
* GET_MODE_BITSIZE (elmode
);
7921 unsigned int lnunits
= const_nunits
/ group_size
;
7922 /* If we can't construct such a vector fall back to
7923 element extracts from the original vector type and
7924 element size stores. */
7925 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7926 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7927 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7928 lnunits
).exists (&vmode
)
7929 && (convert_optab_handler (vec_extract_optab
,
7931 != CODE_FOR_nothing
))
7935 ltype
= build_nonstandard_integer_type (lsize
, 1);
7936 lvectype
= build_vector_type (ltype
, nstores
);
7938 /* Else fall back to vector extraction anyway.
7939 Fewer stores are more important than avoiding spilling
7940 of the vector we extract from. Compared to the
7941 construction case in vectorizable_load no store-forwarding
7942 issue exists here for reasonable archs. */
7945 else if (group_size
>= const_nunits
7946 && group_size
% const_nunits
== 0)
7949 lnel
= const_nunits
;
7953 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7954 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7957 ivstep
= stride_step
;
7958 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7959 build_int_cst (TREE_TYPE (ivstep
), vf
));
7961 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7963 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7964 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7965 create_iv (stride_base
, ivstep
, NULL
,
7966 loop
, &incr_gsi
, insert_after
,
7968 incr
= gsi_stmt (incr_gsi
);
7969 loop_vinfo
->add_stmt (incr
);
7971 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7973 prev_stmt_info
= NULL
;
7974 alias_off
= build_int_cst (ref_type
, 0);
7975 stmt_vec_info next_stmt_info
= first_stmt_info
;
7976 for (g
= 0; g
< group_size
; g
++)
7978 running_off
= offvar
;
7981 tree size
= TYPE_SIZE_UNIT (ltype
);
7982 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7984 tree newoff
= copy_ssa_name (running_off
, NULL
);
7985 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7987 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7988 running_off
= newoff
;
7990 unsigned int group_el
= 0;
7991 unsigned HOST_WIDE_INT
7992 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7993 for (j
= 0; j
< ncopies
; j
++)
7995 /* We've set op and dt above, from vect_get_store_rhs,
7996 and first_stmt_info == stmt_info. */
8001 vect_get_vec_defs (vinfo
, op
, NULL_TREE
, stmt_info
,
8002 &vec_oprnds
, NULL
, slp_node
);
8003 vec_oprnd
= vec_oprnds
[0];
8007 op
= vect_get_store_rhs (next_stmt_info
);
8008 vec_oprnd
= vect_get_vec_def_for_operand
8009 (vinfo
, op
, next_stmt_info
);
8015 vec_oprnd
= vec_oprnds
[j
];
8017 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
8020 /* Pun the vector to extract from if necessary. */
8021 if (lvectype
!= vectype
)
8023 tree tem
= make_ssa_name (lvectype
);
8025 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8026 lvectype
, vec_oprnd
));
8027 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8030 for (i
= 0; i
< nstores
; i
++)
8032 tree newref
, newoff
;
8033 gimple
*incr
, *assign
;
8034 tree size
= TYPE_SIZE (ltype
);
8035 /* Extract the i'th component. */
8036 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8037 bitsize_int (i
), size
);
8038 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8041 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8045 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8047 newref
= build2 (MEM_REF
, ltype
,
8048 running_off
, this_off
);
8049 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8051 /* And store it to *running_off. */
8052 assign
= gimple_build_assign (newref
, elem
);
8053 stmt_vec_info assign_info
8054 = vect_finish_stmt_generation (vinfo
, stmt_info
,
8059 || group_el
== group_size
)
8061 newoff
= copy_ssa_name (running_off
, NULL
);
8062 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8063 running_off
, stride_step
);
8064 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8066 running_off
= newoff
;
8069 if (g
== group_size
- 1
8072 if (j
== 0 && i
== 0)
8073 STMT_VINFO_VEC_STMT (stmt_info
)
8074 = *vec_stmt
= assign_info
;
8076 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
8077 prev_stmt_info
= assign_info
;
8081 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8086 vec_oprnds
.release ();
8090 auto_vec
<tree
> dr_chain (group_size
);
8091 oprnds
.create (group_size
);
8093 /* Gather-scatter accesses perform only component accesses, alignment
8094 is irrelevant for them. */
8095 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8096 alignment_support_scheme
= dr_unaligned_supported
;
8098 alignment_support_scheme
8099 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
8101 gcc_assert (alignment_support_scheme
);
8102 vec_loop_masks
*loop_masks
8103 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8104 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8106 /* Targets with store-lane instructions must not require explicit
8107 realignment. vect_supportable_dr_alignment always returns either
8108 dr_aligned or dr_unaligned_supported for masked operations. */
8109 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8112 || alignment_support_scheme
== dr_aligned
8113 || alignment_support_scheme
== dr_unaligned_supported
);
8115 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
8116 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8117 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8120 tree vec_offset
= NULL_TREE
;
8121 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8123 aggr_type
= NULL_TREE
;
8126 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8128 aggr_type
= elem_type
;
8129 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8130 &bump
, &vec_offset
);
8134 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8135 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8137 aggr_type
= vectype
;
8138 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8139 memory_access_type
);
8143 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8145 /* In case the vectorization factor (VF) is bigger than the number
8146 of elements that we can fit in a vectype (nunits), we have to generate
8147 more than one vector stmt - i.e - we need to "unroll" the
8148 vector stmt by a factor VF/nunits. For more details see documentation in
8149 vect_get_vec_def_for_copy_stmt. */
8151 /* In case of interleaving (non-unit grouped access):
8158 We create vectorized stores starting from base address (the access of the
8159 first stmt in the chain (S2 in the above example), when the last store stmt
8160 of the chain (S4) is reached:
8163 VS2: &base + vec_size*1 = vx0
8164 VS3: &base + vec_size*2 = vx1
8165 VS4: &base + vec_size*3 = vx3
8167 Then permutation statements are generated:
8169 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8170 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8173 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8174 (the order of the data-refs in the output of vect_permute_store_chain
8175 corresponds to the order of scalar stmts in the interleaving chain - see
8176 the documentation of vect_permute_store_chain()).
8178 In case of both multiple types and interleaving, above vector stores and
8179 permutation stmts are created for every copy. The result vector stmts are
8180 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8181 STMT_VINFO_RELATED_STMT for the next copies.
8184 prev_stmt_info
= NULL
;
8185 tree vec_mask
= NULL_TREE
;
8186 for (j
= 0; j
< ncopies
; j
++)
8188 stmt_vec_info new_stmt_info
;
8193 /* Get vectorized arguments for SLP_NODE. */
8194 vect_get_vec_defs (vinfo
, op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
8197 vec_oprnd
= vec_oprnds
[0];
8201 /* For interleaved stores we collect vectorized defs for all the
8202 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8203 used as an input to vect_permute_store_chain(), and OPRNDS as
8204 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8206 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8207 OPRNDS are of size 1. */
8208 stmt_vec_info next_stmt_info
= first_stmt_info
;
8209 for (i
= 0; i
< group_size
; i
++)
8211 /* Since gaps are not supported for interleaved stores,
8212 DR_GROUP_SIZE is the exact number of stmts in the chain.
8213 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8214 that there is no interleaving, DR_GROUP_SIZE is 1,
8215 and only one iteration of the loop will be executed. */
8216 op
= vect_get_store_rhs (next_stmt_info
);
8217 vec_oprnd
= vect_get_vec_def_for_operand
8218 (vinfo
, op
, next_stmt_info
);
8219 dr_chain
.quick_push (vec_oprnd
);
8220 oprnds
.quick_push (vec_oprnd
);
8221 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8224 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
,
8228 /* We should have catched mismatched types earlier. */
8229 gcc_assert (useless_type_conversion_p (vectype
,
8230 TREE_TYPE (vec_oprnd
)));
8231 bool simd_lane_access_p
8232 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8233 if (simd_lane_access_p
8235 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8236 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8237 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8238 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8239 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8240 get_alias_set (TREE_TYPE (ref_type
))))
8242 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8243 dataref_offset
= build_int_cst (ref_type
, 0);
8245 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8246 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
8247 &dataref_ptr
, &vec_offset
);
8250 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8251 simd_lane_access_p
? loop
: NULL
,
8252 offset
, &dummy
, gsi
, &ptr_incr
,
8253 simd_lane_access_p
, NULL_TREE
, bump
);
8257 /* For interleaved stores we created vectorized defs for all the
8258 defs stored in OPRNDS in the previous iteration (previous copy).
8259 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8260 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8262 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8263 OPRNDS are of size 1. */
8264 for (i
= 0; i
< group_size
; i
++)
8267 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8268 dr_chain
[i
] = vec_oprnd
;
8269 oprnds
[i
] = vec_oprnd
;
8272 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8275 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8276 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8277 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8279 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8283 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8287 /* Get an array into which we can store the individual vectors. */
8288 vec_array
= create_vector_array (vectype
, vec_num
);
8290 /* Invalidate the current contents of VEC_ARRAY. This should
8291 become an RTL clobber too, which prevents the vector registers
8292 from being upward-exposed. */
8293 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8295 /* Store the individual vectors into the array. */
8296 for (i
= 0; i
< vec_num
; i
++)
8298 vec_oprnd
= dr_chain
[i
];
8299 write_vector_array (vinfo
, stmt_info
,
8300 gsi
, vec_oprnd
, vec_array
, i
);
8303 tree final_mask
= NULL
;
8305 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8308 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8315 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8317 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8318 tree alias_ptr
= build_int_cst (ref_type
, align
);
8319 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8320 dataref_ptr
, alias_ptr
,
8321 final_mask
, vec_array
);
8326 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8327 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8328 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8330 gimple_call_set_lhs (call
, data_ref
);
8332 gimple_call_set_nothrow (call
, true);
8333 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
8336 /* Record that VEC_ARRAY is now dead. */
8337 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8341 new_stmt_info
= NULL
;
8345 result_chain
.create (group_size
);
8347 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8348 gsi
, &result_chain
);
8351 stmt_vec_info next_stmt_info
= first_stmt_info
;
8352 for (i
= 0; i
< vec_num
; i
++)
8355 unsigned HOST_WIDE_INT align
;
8357 tree final_mask
= NULL_TREE
;
8359 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8361 vectype
, vec_num
* j
+ i
);
8363 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8366 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8368 tree scale
= size_int (gs_info
.scale
);
8371 call
= gimple_build_call_internal
8372 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8373 scale
, vec_oprnd
, final_mask
);
8375 call
= gimple_build_call_internal
8376 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8378 gimple_call_set_nothrow (call
, true);
8380 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8385 /* Bump the vector pointer. */
8386 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8387 gsi
, stmt_info
, bump
);
8390 vec_oprnd
= vec_oprnds
[i
];
8391 else if (grouped_store
)
8392 /* For grouped stores vectorized defs are interleaved in
8393 vect_permute_store_chain(). */
8394 vec_oprnd
= result_chain
[i
];
8396 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8397 if (aligned_access_p (first_dr_info
))
8399 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8401 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8405 misalign
= DR_MISALIGNMENT (first_dr_info
);
8406 if (dataref_offset
== NULL_TREE
8407 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8408 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8411 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8413 tree perm_mask
= perm_mask_for_reverse (vectype
);
8414 tree perm_dest
= vect_create_destination_var
8415 (vect_get_store_rhs (stmt_info
), vectype
);
8416 tree new_temp
= make_ssa_name (perm_dest
);
8418 /* Generate the permute statement. */
8420 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8421 vec_oprnd
, perm_mask
);
8422 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8424 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8425 vec_oprnd
= new_temp
;
8428 /* Arguments are ready. Create the new vector stmt. */
8431 align
= least_bit_hwi (misalign
| align
);
8432 tree ptr
= build_int_cst (ref_type
, align
);
8434 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8436 final_mask
, vec_oprnd
);
8437 gimple_call_set_nothrow (call
, true);
8439 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8443 data_ref
= fold_build2 (MEM_REF
, vectype
,
8447 : build_int_cst (ref_type
, 0));
8448 if (aligned_access_p (first_dr_info
))
8450 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8451 TREE_TYPE (data_ref
)
8452 = build_aligned_type (TREE_TYPE (data_ref
),
8453 align
* BITS_PER_UNIT
);
8455 TREE_TYPE (data_ref
)
8456 = build_aligned_type (TREE_TYPE (data_ref
),
8457 TYPE_ALIGN (elem_type
));
8458 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8460 = gimple_build_assign (data_ref
, vec_oprnd
);
8462 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8468 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8469 if (!next_stmt_info
)
8476 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8478 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8479 prev_stmt_info
= new_stmt_info
;
8484 result_chain
.release ();
8485 vec_oprnds
.release ();
8490 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8491 VECTOR_CST mask. No checks are made that the target platform supports the
8492 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8493 vect_gen_perm_mask_checked. */
8496 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8500 poly_uint64 nunits
= sel
.length ();
8501 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8503 mask_type
= build_vector_type (ssizetype
, nunits
);
8504 return vec_perm_indices_to_tree (mask_type
, sel
);
8507 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8508 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8511 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8513 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8514 return vect_gen_perm_mask_any (vectype
, sel
);
8517 /* Given a vector variable X and Y, that was generated for the scalar
8518 STMT_INFO, generate instructions to permute the vector elements of X and Y
8519 using permutation mask MASK_VEC, insert them at *GSI and return the
8520 permuted vector variable. */
8523 permute_vec_elements (vec_info
*vinfo
,
8524 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8525 gimple_stmt_iterator
*gsi
)
8527 tree vectype
= TREE_TYPE (x
);
8528 tree perm_dest
, data_ref
;
8531 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8532 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8533 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8535 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8536 data_ref
= make_ssa_name (perm_dest
);
8538 /* Generate the permute statement. */
8539 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8540 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8545 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8546 inserting them on the loops preheader edge. Returns true if we
8547 were successful in doing so (and thus STMT_INFO can be moved then),
8548 otherwise returns false. */
8551 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8557 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8559 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8560 if (!gimple_nop_p (def_stmt
)
8561 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8563 /* Make sure we don't need to recurse. While we could do
8564 so in simple cases when there are more complex use webs
8565 we don't have an easy way to preserve stmt order to fulfil
8566 dependencies within them. */
8569 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8571 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8573 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8574 if (!gimple_nop_p (def_stmt2
)
8575 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8585 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8587 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8588 if (!gimple_nop_p (def_stmt
)
8589 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8591 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8592 gsi_remove (&gsi
, false);
8593 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8600 /* vectorizable_load.
8602 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8603 that can be vectorized.
8604 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8605 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8606 Return true if STMT_INFO is vectorizable in this way. */
8609 vectorizable_load (vec_info
*vinfo
,
8610 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8611 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8612 slp_instance slp_node_instance
,
8613 stmt_vector_for_cost
*cost_vec
)
8616 tree vec_dest
= NULL
;
8617 tree data_ref
= NULL
;
8618 stmt_vec_info prev_stmt_info
;
8619 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8620 class loop
*loop
= NULL
;
8621 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8622 bool nested_in_vect_loop
= false;
8627 enum dr_alignment_support alignment_support_scheme
;
8628 tree dataref_ptr
= NULL_TREE
;
8629 tree dataref_offset
= NULL_TREE
;
8630 gimple
*ptr_incr
= NULL
;
8633 unsigned int group_size
;
8634 poly_uint64 group_gap_adj
;
8635 tree msq
= NULL_TREE
, lsq
;
8636 tree offset
= NULL_TREE
;
8637 tree byte_offset
= NULL_TREE
;
8638 tree realignment_token
= NULL_TREE
;
8640 vec
<tree
> dr_chain
= vNULL
;
8641 bool grouped_load
= false;
8642 stmt_vec_info first_stmt_info
;
8643 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8644 bool compute_in_loop
= false;
8645 class loop
*at_loop
;
8647 bool slp
= (slp_node
!= NULL
);
8648 bool slp_perm
= false;
8649 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8652 gather_scatter_info gs_info
;
8654 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8656 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8659 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8663 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8664 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8666 scalar_dest
= gimple_assign_lhs (assign
);
8667 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8670 tree_code code
= gimple_assign_rhs_code (assign
);
8671 if (code
!= ARRAY_REF
8672 && code
!= BIT_FIELD_REF
8673 && code
!= INDIRECT_REF
8674 && code
!= COMPONENT_REF
8675 && code
!= IMAGPART_EXPR
8676 && code
!= REALPART_EXPR
8678 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8683 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8684 if (!call
|| !gimple_call_internal_p (call
))
8687 internal_fn ifn
= gimple_call_internal_fn (call
);
8688 if (!internal_load_fn_p (ifn
))
8691 scalar_dest
= gimple_call_lhs (call
);
8695 int mask_index
= internal_fn_mask_index (ifn
);
8696 if (mask_index
>= 0)
8698 mask
= gimple_call_arg (call
, mask_index
);
8699 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
8705 if (!STMT_VINFO_DATA_REF (stmt_info
))
8708 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8709 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8713 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8714 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8715 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8720 /* Multiple types in SLP are handled by creating the appropriate number of
8721 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8726 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8728 gcc_assert (ncopies
>= 1);
8730 /* FORNOW. This restriction should be relaxed. */
8731 if (nested_in_vect_loop
&& ncopies
> 1)
8733 if (dump_enabled_p ())
8734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8735 "multiple types in nested loop.\n");
8739 /* Invalidate assumptions made by dependence analysis when vectorization
8740 on the unrolled body effectively re-orders stmts. */
8742 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8743 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8744 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8746 if (dump_enabled_p ())
8747 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8748 "cannot perform implicit CSE when unrolling "
8749 "with negative dependence distance\n");
8753 elem_type
= TREE_TYPE (vectype
);
8754 mode
= TYPE_MODE (vectype
);
8756 /* FORNOW. In some cases can vectorize even if data-type not supported
8757 (e.g. - data copies). */
8758 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8760 if (dump_enabled_p ())
8761 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8762 "Aligned load, but unsupported type.\n");
8766 /* Check if the load is a part of an interleaving chain. */
8767 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8769 grouped_load
= true;
8771 gcc_assert (!nested_in_vect_loop
);
8772 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8774 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8775 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8777 /* Refuse non-SLP vectorization of SLP-only groups. */
8778 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8780 if (dump_enabled_p ())
8781 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8782 "cannot vectorize load in non-SLP mode.\n");
8786 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8789 /* Invalidate assumptions made by dependence analysis when vectorization
8790 on the unrolled body effectively re-orders stmts. */
8791 if (!PURE_SLP_STMT (stmt_info
)
8792 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8793 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8794 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8796 if (dump_enabled_p ())
8797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8798 "cannot perform implicit CSE when performing "
8799 "group loads with negative dependence distance\n");
8806 vect_memory_access_type memory_access_type
;
8807 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, VLS_LOAD
,
8808 ncopies
, &memory_access_type
, &gs_info
))
8813 if (memory_access_type
== VMAT_CONTIGUOUS
)
8815 machine_mode vec_mode
= TYPE_MODE (vectype
);
8816 if (!VECTOR_MODE_P (vec_mode
)
8817 || !can_vec_mask_load_store_p (vec_mode
,
8818 TYPE_MODE (mask_vectype
), true))
8821 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8822 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8824 if (dump_enabled_p ())
8825 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8826 "unsupported access type for masked load.\n");
8831 if (!vec_stmt
) /* transformation not required. */
8834 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8837 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8838 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8839 memory_access_type
, &gs_info
, mask
);
8841 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8842 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, memory_access_type
,
8843 slp_node_instance
, slp_node
, cost_vec
);
8848 gcc_assert (memory_access_type
8849 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8851 if (dump_enabled_p ())
8852 dump_printf_loc (MSG_NOTE
, vect_location
,
8853 "transform load. ncopies = %d\n", ncopies
);
8857 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8858 ensure_base_align (dr_info
);
8860 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8862 vect_build_gather_load_calls (vinfo
,
8863 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8867 if (memory_access_type
== VMAT_INVARIANT
)
8869 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8870 /* If we have versioned for aliasing or the loop doesn't
8871 have any data dependencies that would preclude this,
8872 then we are sure this is a loop invariant load and
8873 thus we can insert it on the preheader edge. */
8874 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8875 && !nested_in_vect_loop
8876 && hoist_defs_of_uses (stmt_info
, loop
));
8879 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8880 if (dump_enabled_p ())
8881 dump_printf_loc (MSG_NOTE
, vect_location
,
8882 "hoisting out of the vectorized loop: %G", stmt
);
8883 scalar_dest
= copy_ssa_name (scalar_dest
);
8884 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8885 gsi_insert_on_edge_immediate
8886 (loop_preheader_edge (loop
),
8887 gimple_build_assign (scalar_dest
, rhs
));
8889 /* These copies are all equivalent, but currently the representation
8890 requires a separate STMT_VINFO_VEC_STMT for each one. */
8891 prev_stmt_info
= NULL
;
8892 gimple_stmt_iterator gsi2
= *gsi
;
8894 for (j
= 0; j
< ncopies
; j
++)
8896 stmt_vec_info new_stmt_info
;
8899 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8901 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8902 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8906 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8908 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8911 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8913 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8915 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8916 prev_stmt_info
= new_stmt_info
;
8921 if (memory_access_type
== VMAT_ELEMENTWISE
8922 || memory_access_type
== VMAT_STRIDED_SLP
)
8924 gimple_stmt_iterator incr_gsi
;
8930 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8931 tree stride_base
, stride_step
, alias_off
;
8932 /* Checked by get_load_store_type. */
8933 unsigned int const_nunits
= nunits
.to_constant ();
8934 unsigned HOST_WIDE_INT cst_offset
= 0;
8937 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8938 gcc_assert (!nested_in_vect_loop
);
8942 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8943 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8947 first_stmt_info
= stmt_info
;
8948 first_dr_info
= dr_info
;
8950 if (slp
&& grouped_load
)
8952 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8953 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8959 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8960 * vect_get_place_in_interleaving_chain (stmt_info
,
8963 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8966 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8968 = fold_build_pointer_plus
8969 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8970 size_binop (PLUS_EXPR
,
8971 convert_to_ptrofftype (dr_offset
),
8972 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8973 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8975 /* For a load with loop-invariant (but other than power-of-2)
8976 stride (i.e. not a grouped access) like so:
8978 for (i = 0; i < n; i += stride)
8981 we generate a new induction variable and new accesses to
8982 form a new vector (or vectors, depending on ncopies):
8984 for (j = 0; ; j += VF*stride)
8986 tmp2 = array[j + stride];
8988 vectemp = {tmp1, tmp2, ...}
8991 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8992 build_int_cst (TREE_TYPE (stride_step
), vf
));
8994 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8996 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8997 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8998 create_iv (stride_base
, ivstep
, NULL
,
8999 loop
, &incr_gsi
, insert_after
,
9001 incr
= gsi_stmt (incr_gsi
);
9002 loop_vinfo
->add_stmt (incr
);
9004 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9006 prev_stmt_info
= NULL
;
9007 running_off
= offvar
;
9008 alias_off
= build_int_cst (ref_type
, 0);
9009 int nloads
= const_nunits
;
9011 tree ltype
= TREE_TYPE (vectype
);
9012 tree lvectype
= vectype
;
9013 auto_vec
<tree
> dr_chain
;
9014 if (memory_access_type
== VMAT_STRIDED_SLP
)
9016 if (group_size
< const_nunits
)
9018 /* First check if vec_init optab supports construction from vector
9019 elts directly. Otherwise avoid emitting a constructor of
9020 vector elements by performing the loads using an integer type
9021 of the same size, constructing a vector of those and then
9022 re-interpreting it as the original vector type. This avoids a
9023 huge runtime penalty due to the general inability to perform
9024 store forwarding from smaller stores to a larger load. */
9027 = vector_vector_composition_type (vectype
,
9028 const_nunits
/ group_size
,
9030 if (vtype
!= NULL_TREE
)
9032 nloads
= const_nunits
/ group_size
;
9041 lnel
= const_nunits
;
9044 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9046 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9047 else if (nloads
== 1)
9052 /* For SLP permutation support we need to load the whole group,
9053 not only the number of vector stmts the permutation result
9057 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9059 unsigned int const_vf
= vf
.to_constant ();
9060 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9061 dr_chain
.create (ncopies
);
9064 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9066 unsigned int group_el
= 0;
9067 unsigned HOST_WIDE_INT
9068 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9069 for (j
= 0; j
< ncopies
; j
++)
9072 vec_alloc (v
, nloads
);
9073 stmt_vec_info new_stmt_info
= NULL
;
9074 for (i
= 0; i
< nloads
; i
++)
9076 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9077 group_el
* elsz
+ cst_offset
);
9078 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9079 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9081 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9083 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9085 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9086 gimple_assign_lhs (new_stmt
));
9090 || group_el
== group_size
)
9092 tree newoff
= copy_ssa_name (running_off
);
9093 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9094 running_off
, stride_step
);
9095 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9097 running_off
= newoff
;
9103 tree vec_inv
= build_constructor (lvectype
, v
);
9104 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9105 vec_inv
, lvectype
, gsi
);
9106 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9107 if (lvectype
!= vectype
)
9110 = gimple_build_assign (make_ssa_name (vectype
),
9112 build1 (VIEW_CONVERT_EXPR
,
9113 vectype
, new_temp
));
9115 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9123 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
9125 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9130 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9132 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9133 prev_stmt_info
= new_stmt_info
;
9139 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9140 slp_node_instance
, false, &n_perms
);
9145 if (memory_access_type
== VMAT_GATHER_SCATTER
9146 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9147 grouped_load
= false;
9151 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9152 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9153 /* For SLP vectorization we directly vectorize a subchain
9154 without permutation. */
9155 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9156 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9157 /* For BB vectorization always use the first stmt to base
9158 the data ref pointer on. */
9160 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9162 /* Check if the chain of loads is already vectorized. */
9163 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
9164 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9165 ??? But we can only do so if there is exactly one
9166 as we have no way to get at the rest. Leave the CSE
9168 ??? With the group load eventually participating
9169 in multiple different permutations (having multiple
9170 slp nodes which refer to the same group) the CSE
9171 is even wrong code. See PR56270. */
9174 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9177 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9180 /* VEC_NUM is the number of vect stmts to be created for this group. */
9183 grouped_load
= false;
9184 /* If an SLP permutation is from N elements to N elements,
9185 and if one vector holds a whole number of N, we can load
9186 the inputs to the permutation in the same way as an
9187 unpermuted sequence. In other cases we need to load the
9188 whole group, not only the number of vector stmts the
9189 permutation result fits in. */
9191 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
9192 || !multiple_p (nunits
, group_size
)))
9194 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9195 variable VF; see vect_transform_slp_perm_load. */
9196 unsigned int const_vf
= vf
.to_constant ();
9197 unsigned int const_nunits
= nunits
.to_constant ();
9198 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9199 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9203 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9205 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
9209 vec_num
= group_size
;
9211 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9215 first_stmt_info
= stmt_info
;
9216 first_dr_info
= dr_info
;
9217 group_size
= vec_num
= 1;
9219 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9222 /* Gather-scatter accesses perform only component accesses, alignment
9223 is irrelevant for them. */
9224 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9225 alignment_support_scheme
= dr_unaligned_supported
;
9227 alignment_support_scheme
9228 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
9230 gcc_assert (alignment_support_scheme
);
9231 vec_loop_masks
*loop_masks
9232 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9233 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9235 /* Targets with store-lane instructions must not require explicit
9236 realignment. vect_supportable_dr_alignment always returns either
9237 dr_aligned or dr_unaligned_supported for masked operations. */
9238 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9241 || alignment_support_scheme
== dr_aligned
9242 || alignment_support_scheme
== dr_unaligned_supported
);
9244 /* In case the vectorization factor (VF) is bigger than the number
9245 of elements that we can fit in a vectype (nunits), we have to generate
9246 more than one vector stmt - i.e - we need to "unroll" the
9247 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9248 from one copy of the vector stmt to the next, in the field
9249 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9250 stages to find the correct vector defs to be used when vectorizing
9251 stmts that use the defs of the current stmt. The example below
9252 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9253 need to create 4 vectorized stmts):
9255 before vectorization:
9256 RELATED_STMT VEC_STMT
9260 step 1: vectorize stmt S1:
9261 We first create the vector stmt VS1_0, and, as usual, record a
9262 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9263 Next, we create the vector stmt VS1_1, and record a pointer to
9264 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9265 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9267 RELATED_STMT VEC_STMT
9268 VS1_0: vx0 = memref0 VS1_1 -
9269 VS1_1: vx1 = memref1 VS1_2 -
9270 VS1_2: vx2 = memref2 VS1_3 -
9271 VS1_3: vx3 = memref3 - -
9272 S1: x = load - VS1_0
9275 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9276 information we recorded in RELATED_STMT field is used to vectorize
9279 /* In case of interleaving (non-unit grouped access):
9286 Vectorized loads are created in the order of memory accesses
9287 starting from the access of the first stmt of the chain:
9290 VS2: vx1 = &base + vec_size*1
9291 VS3: vx3 = &base + vec_size*2
9292 VS4: vx4 = &base + vec_size*3
9294 Then permutation statements are generated:
9296 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9297 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9300 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9301 (the order of the data-refs in the output of vect_permute_load_chain
9302 corresponds to the order of scalar stmts in the interleaving chain - see
9303 the documentation of vect_permute_load_chain()).
9304 The generation of permutation stmts and recording them in
9305 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9307 In case of both multiple types and interleaving, the vector loads and
9308 permutation stmts above are created for every copy. The result vector
9309 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9310 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9312 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9313 on a target that supports unaligned accesses (dr_unaligned_supported)
9314 we generate the following code:
9318 p = p + indx * vectype_size;
9323 Otherwise, the data reference is potentially unaligned on a target that
9324 does not support unaligned accesses (dr_explicit_realign_optimized) -
9325 then generate the following code, in which the data in each iteration is
9326 obtained by two vector loads, one from the previous iteration, and one
9327 from the current iteration:
9329 msq_init = *(floor(p1))
9330 p2 = initial_addr + VS - 1;
9331 realignment_token = call target_builtin;
9334 p2 = p2 + indx * vectype_size
9336 vec_dest = realign_load (msq, lsq, realignment_token)
9341 /* If the misalignment remains the same throughout the execution of the
9342 loop, we can create the init_addr and permutation mask at the loop
9343 preheader. Otherwise, it needs to be created inside the loop.
9344 This can only occur when vectorizing memory accesses in the inner-loop
9345 nested within an outer-loop that is being vectorized. */
9347 if (nested_in_vect_loop
9348 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9349 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9351 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9352 compute_in_loop
= true;
9355 bool diff_first_stmt_info
9356 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9358 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9359 || alignment_support_scheme
== dr_explicit_realign
)
9360 && !compute_in_loop
)
9362 /* If we have different first_stmt_info, we can't set up realignment
9363 here, since we can't guarantee first_stmt_info DR has been
9364 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9365 distance from first_stmt_info DR instead as below. */
9366 if (!diff_first_stmt_info
)
9367 msq
= vect_setup_realignment (loop_vinfo
,
9368 first_stmt_info
, gsi
, &realignment_token
,
9369 alignment_support_scheme
, NULL_TREE
,
9371 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9373 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9374 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9376 gcc_assert (!first_stmt_info_for_drptr
);
9382 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9383 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9386 tree vec_offset
= NULL_TREE
;
9387 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9389 aggr_type
= NULL_TREE
;
9392 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9394 aggr_type
= elem_type
;
9395 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9396 &bump
, &vec_offset
);
9400 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9401 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9403 aggr_type
= vectype
;
9404 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9405 memory_access_type
);
9408 tree vec_mask
= NULL_TREE
;
9409 prev_stmt_info
= NULL
;
9410 poly_uint64 group_elt
= 0;
9411 for (j
= 0; j
< ncopies
; j
++)
9413 stmt_vec_info new_stmt_info
= NULL
;
9414 /* 1. Create the vector or array pointer update chain. */
9417 bool simd_lane_access_p
9418 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9419 if (simd_lane_access_p
9420 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9421 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9422 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9423 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9424 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9425 get_alias_set (TREE_TYPE (ref_type
)))
9426 && (alignment_support_scheme
== dr_aligned
9427 || alignment_support_scheme
== dr_unaligned_supported
))
9429 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9430 dataref_offset
= build_int_cst (ref_type
, 0);
9432 else if (diff_first_stmt_info
)
9435 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9436 aggr_type
, at_loop
, offset
, &dummy
,
9437 gsi
, &ptr_incr
, simd_lane_access_p
,
9439 /* Adjust the pointer by the difference to first_stmt. */
9440 data_reference_p ptrdr
9441 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9443 = fold_convert (sizetype
,
9444 size_binop (MINUS_EXPR
,
9445 DR_INIT (first_dr_info
->dr
),
9447 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9449 if (alignment_support_scheme
== dr_explicit_realign
)
9451 msq
= vect_setup_realignment (vinfo
,
9452 first_stmt_info_for_drptr
, gsi
,
9454 alignment_support_scheme
,
9455 dataref_ptr
, &at_loop
);
9456 gcc_assert (!compute_in_loop
);
9459 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9460 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
9461 &dataref_ptr
, &vec_offset
);
9464 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9466 offset
, &dummy
, gsi
, &ptr_incr
,
9473 auto_vec
<vec
<tree
> > vec_defs (1);
9474 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
9475 vec_mask
= vec_defs
[0][0];
9478 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
,
9485 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9487 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9488 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9490 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9493 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9496 if (grouped_load
|| slp_perm
)
9497 dr_chain
.create (vec_num
);
9499 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9503 vec_array
= create_vector_array (vectype
, vec_num
);
9505 tree final_mask
= NULL_TREE
;
9507 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9510 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9517 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9519 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9520 tree alias_ptr
= build_int_cst (ref_type
, align
);
9521 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9522 dataref_ptr
, alias_ptr
,
9528 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9529 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9530 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9532 gimple_call_set_lhs (call
, vec_array
);
9533 gimple_call_set_nothrow (call
, true);
9534 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
9537 /* Extract each vector into an SSA_NAME. */
9538 for (i
= 0; i
< vec_num
; i
++)
9540 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9542 dr_chain
.quick_push (new_temp
);
9545 /* Record the mapping between SSA_NAMEs and statements. */
9546 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9548 /* Record that VEC_ARRAY is now dead. */
9549 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9553 for (i
= 0; i
< vec_num
; i
++)
9555 tree final_mask
= NULL_TREE
;
9557 && memory_access_type
!= VMAT_INVARIANT
)
9558 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9560 vectype
, vec_num
* j
+ i
);
9562 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9566 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9567 gsi
, stmt_info
, bump
);
9569 /* 2. Create the vector-load in the loop. */
9570 gimple
*new_stmt
= NULL
;
9571 switch (alignment_support_scheme
)
9574 case dr_unaligned_supported
:
9576 unsigned int misalign
;
9577 unsigned HOST_WIDE_INT align
;
9579 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9581 tree zero
= build_zero_cst (vectype
);
9582 tree scale
= size_int (gs_info
.scale
);
9585 call
= gimple_build_call_internal
9586 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9587 vec_offset
, scale
, zero
, final_mask
);
9589 call
= gimple_build_call_internal
9590 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9591 vec_offset
, scale
, zero
);
9592 gimple_call_set_nothrow (call
, true);
9594 data_ref
= NULL_TREE
;
9599 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9600 if (alignment_support_scheme
== dr_aligned
)
9602 gcc_assert (aligned_access_p (first_dr_info
));
9605 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9607 align
= dr_alignment
9608 (vect_dr_behavior (vinfo
, first_dr_info
));
9612 misalign
= DR_MISALIGNMENT (first_dr_info
);
9613 if (dataref_offset
== NULL_TREE
9614 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9615 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9620 align
= least_bit_hwi (misalign
| align
);
9621 tree ptr
= build_int_cst (ref_type
, align
);
9623 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9626 gimple_call_set_nothrow (call
, true);
9628 data_ref
= NULL_TREE
;
9632 tree ltype
= vectype
;
9633 tree new_vtype
= NULL_TREE
;
9634 /* If there's no peeling for gaps but we have a gap
9635 with slp loads then load the lower half of the
9636 vector only. See get_group_load_store_type for
9637 when we apply this optimization. */
9640 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9641 && DR_GROUP_GAP (first_stmt_info
) != 0
9642 && known_eq (nunits
,
9644 - DR_GROUP_GAP (first_stmt_info
)) * 2)
9645 && known_eq (nunits
, group_size
))
9649 = vector_vector_composition_type (vectype
, 2,
9651 if (new_vtype
!= NULL_TREE
)
9655 = (dataref_offset
? dataref_offset
9656 : build_int_cst (ref_type
, 0));
9657 if (ltype
!= vectype
9658 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9660 unsigned HOST_WIDE_INT gap
9661 = DR_GROUP_GAP (first_stmt_info
);
9662 gap
*= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9663 tree gapcst
= build_int_cst (ref_type
, gap
);
9664 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9667 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9668 if (alignment_support_scheme
== dr_aligned
)
9670 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9671 TREE_TYPE (data_ref
)
9672 = build_aligned_type (TREE_TYPE (data_ref
),
9673 align
* BITS_PER_UNIT
);
9675 TREE_TYPE (data_ref
)
9676 = build_aligned_type (TREE_TYPE (data_ref
),
9677 TYPE_ALIGN (elem_type
));
9678 if (ltype
!= vectype
)
9680 vect_copy_ref_info (data_ref
,
9681 DR_REF (first_dr_info
->dr
));
9682 tree tem
= make_ssa_name (ltype
);
9683 new_stmt
= gimple_build_assign (tem
, data_ref
);
9684 vect_finish_stmt_generation (vinfo
, stmt_info
,
9687 vec
<constructor_elt
, va_gc
> *v
;
9689 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9691 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9692 build_zero_cst (ltype
));
9693 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9697 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9698 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9699 build_zero_cst (ltype
));
9701 gcc_assert (new_vtype
!= NULL_TREE
);
9702 if (new_vtype
== vectype
)
9703 new_stmt
= gimple_build_assign (
9704 vec_dest
, build_constructor (vectype
, v
));
9707 tree new_vname
= make_ssa_name (new_vtype
);
9708 new_stmt
= gimple_build_assign (
9709 new_vname
, build_constructor (new_vtype
, v
));
9710 vect_finish_stmt_generation (vinfo
, stmt_info
,
9712 new_stmt
= gimple_build_assign (
9713 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9720 case dr_explicit_realign
:
9724 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9726 if (compute_in_loop
)
9727 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9729 dr_explicit_realign
,
9732 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9733 ptr
= copy_ssa_name (dataref_ptr
);
9735 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9736 // For explicit realign the target alignment should be
9737 // known at compile time.
9738 unsigned HOST_WIDE_INT align
=
9739 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9740 new_stmt
= gimple_build_assign
9741 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9743 (TREE_TYPE (dataref_ptr
),
9744 -(HOST_WIDE_INT
) align
));
9745 vect_finish_stmt_generation (vinfo
, stmt_info
,
9748 = build2 (MEM_REF
, vectype
, ptr
,
9749 build_int_cst (ref_type
, 0));
9750 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9751 vec_dest
= vect_create_destination_var (scalar_dest
,
9753 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9754 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9755 gimple_assign_set_lhs (new_stmt
, new_temp
);
9756 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9757 vect_finish_stmt_generation (vinfo
, stmt_info
,
9761 bump
= size_binop (MULT_EXPR
, vs
,
9762 TYPE_SIZE_UNIT (elem_type
));
9763 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9764 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9766 new_stmt
= gimple_build_assign
9767 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9769 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9770 ptr
= copy_ssa_name (ptr
, new_stmt
);
9771 gimple_assign_set_lhs (new_stmt
, ptr
);
9772 vect_finish_stmt_generation (vinfo
, stmt_info
,
9775 = build2 (MEM_REF
, vectype
, ptr
,
9776 build_int_cst (ref_type
, 0));
9779 case dr_explicit_realign_optimized
:
9781 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9782 new_temp
= copy_ssa_name (dataref_ptr
);
9784 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9785 // We should only be doing this if we know the target
9786 // alignment at compile time.
9787 unsigned HOST_WIDE_INT align
=
9788 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9789 new_stmt
= gimple_build_assign
9790 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9791 build_int_cst (TREE_TYPE (dataref_ptr
),
9792 -(HOST_WIDE_INT
) align
));
9793 vect_finish_stmt_generation (vinfo
, stmt_info
,
9796 = build2 (MEM_REF
, vectype
, new_temp
,
9797 build_int_cst (ref_type
, 0));
9803 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9804 /* DATA_REF is null if we've already built the statement. */
9807 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9808 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9810 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9811 gimple_set_lhs (new_stmt
, new_temp
);
9813 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9816 /* 3. Handle explicit realignment if necessary/supported.
9818 vec_dest = realign_load (msq, lsq, realignment_token) */
9819 if (alignment_support_scheme
== dr_explicit_realign_optimized
9820 || alignment_support_scheme
== dr_explicit_realign
)
9822 lsq
= gimple_assign_lhs (new_stmt
);
9823 if (!realignment_token
)
9824 realignment_token
= dataref_ptr
;
9825 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9826 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9827 msq
, lsq
, realignment_token
);
9828 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9829 gimple_assign_set_lhs (new_stmt
, new_temp
);
9831 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9834 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9837 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9838 add_phi_arg (phi
, lsq
,
9839 loop_latch_edge (containing_loop
),
9845 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9847 tree perm_mask
= perm_mask_for_reverse (vectype
);
9848 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9849 perm_mask
, stmt_info
, gsi
);
9850 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9853 /* Collect vector loads and later create their permutation in
9854 vect_transform_grouped_load (). */
9855 if (grouped_load
|| slp_perm
)
9856 dr_chain
.quick_push (new_temp
);
9858 /* Store vector loads in the corresponding SLP_NODE. */
9859 if (slp
&& !slp_perm
)
9860 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9862 /* With SLP permutation we load the gaps as well, without
9863 we need to skip the gaps after we manage to fully load
9864 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9865 group_elt
+= nunits
;
9866 if (maybe_ne (group_gap_adj
, 0U)
9868 && known_eq (group_elt
, group_size
- group_gap_adj
))
9870 poly_wide_int bump_val
9871 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9873 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9874 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9875 gsi
, stmt_info
, bump
);
9879 /* Bump the vector pointer to account for a gap or for excess
9880 elements loaded for a permuted SLP load. */
9881 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9883 poly_wide_int bump_val
9884 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9886 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9887 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9892 if (slp
&& !slp_perm
)
9898 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9899 slp_node_instance
, false,
9902 dr_chain
.release ();
9910 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9911 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9913 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9918 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9920 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9921 prev_stmt_info
= new_stmt_info
;
9924 dr_chain
.release ();
9930 /* Function vect_is_simple_cond.
9933 LOOP - the loop that is being vectorized.
9934 COND - Condition that is checked for simple use.
9937 *COMP_VECTYPE - the vector type for the comparison.
9938 *DTS - The def types for the arguments of the comparison
9940 Returns whether a COND can be vectorized. Checks whether
9941 condition operands are supportable using vec_is_simple_use. */
9944 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, slp_tree slp_node
,
9945 tree
*comp_vectype
, enum vect_def_type
*dts
,
9949 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9952 if (TREE_CODE (cond
) == SSA_NAME
9953 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9955 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
9957 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9962 if (!COMPARISON_CLASS_P (cond
))
9965 lhs
= TREE_OPERAND (cond
, 0);
9966 rhs
= TREE_OPERAND (cond
, 1);
9968 if (TREE_CODE (lhs
) == SSA_NAME
)
9970 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
9973 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9974 || TREE_CODE (lhs
) == FIXED_CST
)
9975 dts
[0] = vect_constant_def
;
9979 if (TREE_CODE (rhs
) == SSA_NAME
)
9981 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
9984 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9985 || TREE_CODE (rhs
) == FIXED_CST
)
9986 dts
[1] = vect_constant_def
;
9990 if (vectype1
&& vectype2
9991 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9992 TYPE_VECTOR_SUBPARTS (vectype2
)))
9995 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9996 /* Invariant comparison. */
9997 if (! *comp_vectype
)
9999 tree scalar_type
= TREE_TYPE (lhs
);
10000 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10001 *comp_vectype
= truth_type_for (vectype
);
10004 /* If we can widen the comparison to match vectype do so. */
10005 if (INTEGRAL_TYPE_P (scalar_type
)
10007 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10008 TYPE_SIZE (TREE_TYPE (vectype
))))
10009 scalar_type
= build_nonstandard_integer_type
10010 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
10011 TYPE_UNSIGNED (scalar_type
));
10012 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10020 /* vectorizable_condition.
10022 Check if STMT_INFO is conditional modify expression that can be vectorized.
10023 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10024 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10027 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10029 Return true if STMT_INFO is vectorizable in this way. */
10032 vectorizable_condition (vec_info
*vinfo
,
10033 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10034 stmt_vec_info
*vec_stmt
,
10035 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10037 tree scalar_dest
= NULL_TREE
;
10038 tree vec_dest
= NULL_TREE
;
10039 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10040 tree then_clause
, else_clause
;
10041 tree comp_vectype
= NULL_TREE
;
10042 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10043 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10046 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10047 enum vect_def_type dts
[4]
10048 = {vect_unknown_def_type
, vect_unknown_def_type
,
10049 vect_unknown_def_type
, vect_unknown_def_type
};
10053 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10054 stmt_vec_info prev_stmt_info
= NULL
;
10056 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10057 vec
<tree
> vec_oprnds0
= vNULL
;
10058 vec
<tree
> vec_oprnds1
= vNULL
;
10059 vec
<tree
> vec_oprnds2
= vNULL
;
10060 vec
<tree
> vec_oprnds3
= vNULL
;
10062 bool masked
= false;
10064 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10067 /* Is vectorizable conditional operation? */
10068 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10072 code
= gimple_assign_rhs_code (stmt
);
10073 if (code
!= COND_EXPR
)
10076 stmt_vec_info reduc_info
= NULL
;
10077 int reduc_index
= -1;
10078 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10080 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10083 if (STMT_SLP_TYPE (stmt_info
))
10085 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10086 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10087 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10088 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10089 || reduc_index
!= -1);
10093 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10096 /* FORNOW: only supported as part of a reduction. */
10097 if (STMT_VINFO_LIVE_P (stmt_info
))
10099 if (dump_enabled_p ())
10100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10101 "value used after loop.\n");
10106 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10107 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10112 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10116 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10120 gcc_assert (ncopies
>= 1);
10121 if (for_reduction
&& ncopies
> 1)
10122 return false; /* FORNOW */
10124 cond_expr
= gimple_assign_rhs1 (stmt
);
10125 then_clause
= gimple_assign_rhs2 (stmt
);
10126 else_clause
= gimple_assign_rhs3 (stmt
);
10128 if (!vect_is_simple_cond (cond_expr
, vinfo
, slp_node
,
10129 &comp_vectype
, &dts
[0], vectype
)
10133 if (!vect_is_simple_use (then_clause
, vinfo
, &dts
[2], &vectype1
))
10135 if (!vect_is_simple_use (else_clause
, vinfo
, &dts
[3], &vectype2
))
10138 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10141 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10144 masked
= !COMPARISON_CLASS_P (cond_expr
);
10145 vec_cmp_type
= truth_type_for (comp_vectype
);
10147 if (vec_cmp_type
== NULL_TREE
)
10150 cond_code
= TREE_CODE (cond_expr
);
10153 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10154 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10157 /* For conditional reductions, the "then" value needs to be the candidate
10158 value calculated by this iteration while the "else" value needs to be
10159 the result carried over from previous iterations. If the COND_EXPR
10160 is the other way around, we need to swap it. */
10161 bool must_invert_cmp_result
= false;
10162 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10165 must_invert_cmp_result
= true;
10168 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10169 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10170 if (new_code
== ERROR_MARK
)
10171 must_invert_cmp_result
= true;
10174 cond_code
= new_code
;
10175 /* Make sure we don't accidentally use the old condition. */
10176 cond_expr
= NULL_TREE
;
10179 std::swap (then_clause
, else_clause
);
10182 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10184 /* Boolean values may have another representation in vectors
10185 and therefore we prefer bit operations over comparison for
10186 them (which also works for scalar masks). We store opcodes
10187 to use in bitop1 and bitop2. Statement is vectorized as
10188 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10189 depending on bitop1 and bitop2 arity. */
10193 bitop1
= BIT_NOT_EXPR
;
10194 bitop2
= BIT_AND_EXPR
;
10197 bitop1
= BIT_NOT_EXPR
;
10198 bitop2
= BIT_IOR_EXPR
;
10201 bitop1
= BIT_NOT_EXPR
;
10202 bitop2
= BIT_AND_EXPR
;
10203 std::swap (cond_expr0
, cond_expr1
);
10206 bitop1
= BIT_NOT_EXPR
;
10207 bitop2
= BIT_IOR_EXPR
;
10208 std::swap (cond_expr0
, cond_expr1
);
10211 bitop1
= BIT_XOR_EXPR
;
10214 bitop1
= BIT_XOR_EXPR
;
10215 bitop2
= BIT_NOT_EXPR
;
10220 cond_code
= SSA_NAME
;
10223 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10224 && reduction_type
== EXTRACT_LAST_REDUCTION
10225 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10227 if (dump_enabled_p ())
10228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10229 "reduction comparison operation not supported.\n");
10235 if (bitop1
!= NOP_EXPR
)
10237 machine_mode mode
= TYPE_MODE (comp_vectype
);
10240 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10241 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10244 if (bitop2
!= NOP_EXPR
)
10246 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10248 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10254 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
10255 && reduction_type
== EXTRACT_LAST_REDUCTION
)
10256 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10257 ncopies
* vec_num
, vectype
, NULL
);
10259 vect_cost_for_stmt kind
= vector_stmt
;
10260 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10261 /* Count one reduction-like operation per vector. */
10262 kind
= vec_to_scalar
;
10263 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10266 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10267 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10276 vec_oprnds0
.create (1);
10277 vec_oprnds1
.create (1);
10278 vec_oprnds2
.create (1);
10279 vec_oprnds3
.create (1);
10283 scalar_dest
= gimple_assign_lhs (stmt
);
10284 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10285 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10287 /* Handle cond expr. */
10288 for (j
= 0; j
< ncopies
; j
++)
10290 bool swap_cond_operands
= false;
10292 /* See whether another part of the vectorized code applies a loop
10293 mask to the condition, or to its inverse. */
10295 vec_loop_masks
*masks
= NULL
;
10296 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10298 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10299 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10302 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10303 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10304 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10307 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10308 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10309 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10311 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10312 cond_code
= cond
.code
;
10313 swap_cond_operands
= true;
10319 stmt_vec_info new_stmt_info
= NULL
;
10324 auto_vec
<vec
<tree
>, 4> vec_defs
;
10325 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
10326 vec_oprnds3
= vec_defs
.pop ();
10327 vec_oprnds2
= vec_defs
.pop ();
10329 vec_oprnds1
= vec_defs
.pop ();
10330 vec_oprnds0
= vec_defs
.pop ();
10337 = vect_get_vec_def_for_operand (vinfo
, cond_expr
, stmt_info
,
10343 = vect_get_vec_def_for_operand (vinfo
, cond_expr0
,
10344 stmt_info
, comp_vectype
);
10346 = vect_get_vec_def_for_operand (vinfo
, cond_expr1
,
10347 stmt_info
, comp_vectype
);
10349 vec_then_clause
= vect_get_vec_def_for_operand (vinfo
,
10352 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10353 vec_else_clause
= vect_get_vec_def_for_operand (vinfo
,
10361 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
10364 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
10366 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10367 vec_oprnds2
.pop ());
10368 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10369 vec_oprnds3
.pop ());
10374 vec_oprnds0
.quick_push (vec_cond_lhs
);
10376 vec_oprnds1
.quick_push (vec_cond_rhs
);
10377 vec_oprnds2
.quick_push (vec_then_clause
);
10378 vec_oprnds3
.quick_push (vec_else_clause
);
10381 /* Arguments are ready. Create the new vector stmt. */
10382 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10384 vec_then_clause
= vec_oprnds2
[i
];
10385 vec_else_clause
= vec_oprnds3
[i
];
10387 if (swap_cond_operands
)
10388 std::swap (vec_then_clause
, vec_else_clause
);
10391 vec_compare
= vec_cond_lhs
;
10394 vec_cond_rhs
= vec_oprnds1
[i
];
10395 if (bitop1
== NOP_EXPR
)
10396 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10397 vec_cond_lhs
, vec_cond_rhs
);
10400 new_temp
= make_ssa_name (vec_cmp_type
);
10402 if (bitop1
== BIT_NOT_EXPR
)
10403 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10407 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10409 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10410 if (bitop2
== NOP_EXPR
)
10411 vec_compare
= new_temp
;
10412 else if (bitop2
== BIT_NOT_EXPR
)
10414 /* Instead of doing ~x ? y : z do x ? z : y. */
10415 vec_compare
= new_temp
;
10416 std::swap (vec_then_clause
, vec_else_clause
);
10420 vec_compare
= make_ssa_name (vec_cmp_type
);
10422 = gimple_build_assign (vec_compare
, bitop2
,
10423 vec_cond_lhs
, new_temp
);
10424 vect_finish_stmt_generation (vinfo
, stmt_info
,
10430 /* If we decided to apply a loop mask to the result of the vector
10431 comparison, AND the comparison with the mask now. Later passes
10432 should then be able to reuse the AND results between mulitple
10436 for (int i = 0; i < 100; ++i)
10437 x[i] = y[i] ? z[i] : 10;
10439 results in following optimized GIMPLE:
10441 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10442 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10443 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10444 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10445 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10446 vect_iftmp.11_47, { 10, ... }>;
10448 instead of using a masked and unmasked forms of
10449 vec != { 0, ... } (masked in the MASK_LOAD,
10450 unmasked in the VEC_COND_EXPR). */
10452 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10453 in cases where that's necessary. */
10455 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10457 if (!is_gimple_val (vec_compare
))
10459 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10460 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10462 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10463 vec_compare
= vec_compare_name
;
10466 if (must_invert_cmp_result
)
10468 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10469 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10472 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10473 vec_compare
= vec_compare_name
;
10478 unsigned vec_num
= vec_oprnds0
.length ();
10480 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10481 vectype
, vec_num
* j
+ i
);
10482 tree tmp2
= make_ssa_name (vec_cmp_type
);
10484 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10486 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10487 vec_compare
= tmp2
;
10491 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10493 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10494 tree lhs
= gimple_get_lhs (old_stmt
);
10495 gcall
*new_stmt
= gimple_build_call_internal
10496 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10498 gimple_call_set_lhs (new_stmt
, lhs
);
10499 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10500 if (old_stmt
== gsi_stmt (*gsi
))
10501 new_stmt_info
= vect_finish_replace_stmt (vinfo
,
10502 stmt_info
, new_stmt
);
10505 /* In this case we're moving the definition to later in the
10506 block. That doesn't matter because the only uses of the
10507 lhs are in phi statements. */
10508 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10509 gsi_remove (&old_gsi
, true);
10511 = vect_finish_stmt_generation (vinfo
, stmt_info
,
10517 new_temp
= make_ssa_name (vec_dest
);
10519 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10520 vec_then_clause
, vec_else_clause
);
10522 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10525 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10532 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10534 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10536 prev_stmt_info
= new_stmt_info
;
10539 vec_oprnds0
.release ();
10540 vec_oprnds1
.release ();
10541 vec_oprnds2
.release ();
10542 vec_oprnds3
.release ();
10547 /* vectorizable_comparison.
10549 Check if STMT_INFO is comparison expression that can be vectorized.
10550 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10551 comparison, put it in VEC_STMT, and insert it at GSI.
10553 Return true if STMT_INFO is vectorizable in this way. */
10556 vectorizable_comparison (vec_info
*vinfo
,
10557 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10558 stmt_vec_info
*vec_stmt
,
10559 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10561 tree lhs
, rhs1
, rhs2
;
10562 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10563 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10564 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10566 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10567 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10569 poly_uint64 nunits
;
10571 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10572 stmt_vec_info prev_stmt_info
= NULL
;
10574 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10575 vec
<tree
> vec_oprnds0
= vNULL
;
10576 vec
<tree
> vec_oprnds1
= vNULL
;
10580 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10583 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10586 mask_type
= vectype
;
10587 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10592 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10594 gcc_assert (ncopies
>= 1);
10595 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10598 if (STMT_VINFO_LIVE_P (stmt_info
))
10600 if (dump_enabled_p ())
10601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10602 "value used after loop.\n");
10606 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10610 code
= gimple_assign_rhs_code (stmt
);
10612 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10615 rhs1
= gimple_assign_rhs1 (stmt
);
10616 rhs2
= gimple_assign_rhs2 (stmt
);
10618 if (!vect_is_simple_use (rhs1
, vinfo
, &dts
[0], &vectype1
))
10621 if (!vect_is_simple_use (rhs2
, vinfo
, &dts
[1], &vectype2
))
10624 if (vectype1
&& vectype2
10625 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10626 TYPE_VECTOR_SUBPARTS (vectype2
)))
10629 vectype
= vectype1
? vectype1
: vectype2
;
10631 /* Invariant comparison. */
10634 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10635 vectype
= mask_type
;
10637 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10639 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10642 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10645 /* Can't compare mask and non-mask types. */
10646 if (vectype1
&& vectype2
10647 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10650 /* Boolean values may have another representation in vectors
10651 and therefore we prefer bit operations over comparison for
10652 them (which also works for scalar masks). We store opcodes
10653 to use in bitop1 and bitop2. Statement is vectorized as
10654 BITOP2 (rhs1 BITOP1 rhs2) or
10655 rhs1 BITOP2 (BITOP1 rhs2)
10656 depending on bitop1 and bitop2 arity. */
10657 bool swap_p
= false;
10658 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10660 if (code
== GT_EXPR
)
10662 bitop1
= BIT_NOT_EXPR
;
10663 bitop2
= BIT_AND_EXPR
;
10665 else if (code
== GE_EXPR
)
10667 bitop1
= BIT_NOT_EXPR
;
10668 bitop2
= BIT_IOR_EXPR
;
10670 else if (code
== LT_EXPR
)
10672 bitop1
= BIT_NOT_EXPR
;
10673 bitop2
= BIT_AND_EXPR
;
10676 else if (code
== LE_EXPR
)
10678 bitop1
= BIT_NOT_EXPR
;
10679 bitop2
= BIT_IOR_EXPR
;
10684 bitop1
= BIT_XOR_EXPR
;
10685 if (code
== EQ_EXPR
)
10686 bitop2
= BIT_NOT_EXPR
;
10692 if (bitop1
== NOP_EXPR
)
10694 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10699 machine_mode mode
= TYPE_MODE (vectype
);
10702 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10703 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10706 if (bitop2
!= NOP_EXPR
)
10708 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10709 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10714 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10715 vect_model_simple_cost (vinfo
, stmt_info
,
10716 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10717 dts
, ndts
, slp_node
, cost_vec
);
10724 vec_oprnds0
.create (1);
10725 vec_oprnds1
.create (1);
10729 lhs
= gimple_assign_lhs (stmt
);
10730 mask
= vect_create_destination_var (lhs
, mask_type
);
10732 /* Handle cmp expr. */
10733 for (j
= 0; j
< ncopies
; j
++)
10735 stmt_vec_info new_stmt_info
= NULL
;
10740 auto_vec
<vec
<tree
>, 2> vec_defs
;
10741 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
10742 vec_oprnds1
= vec_defs
.pop ();
10743 vec_oprnds0
= vec_defs
.pop ();
10745 std::swap (vec_oprnds0
, vec_oprnds1
);
10749 vec_rhs1
= vect_get_vec_def_for_operand (vinfo
, rhs1
, stmt_info
,
10751 vec_rhs2
= vect_get_vec_def_for_operand (vinfo
, rhs2
, stmt_info
,
10757 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10758 vec_oprnds0
.pop ());
10759 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10760 vec_oprnds1
.pop ());
10765 if (swap_p
&& j
== 0)
10766 std::swap (vec_rhs1
, vec_rhs2
);
10767 vec_oprnds0
.quick_push (vec_rhs1
);
10768 vec_oprnds1
.quick_push (vec_rhs2
);
10771 /* Arguments are ready. Create the new vector stmt. */
10772 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10774 vec_rhs2
= vec_oprnds1
[i
];
10776 new_temp
= make_ssa_name (mask
);
10777 if (bitop1
== NOP_EXPR
)
10779 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10780 vec_rhs1
, vec_rhs2
);
10782 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10787 if (bitop1
== BIT_NOT_EXPR
)
10788 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10790 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10793 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10794 if (bitop2
!= NOP_EXPR
)
10796 tree res
= make_ssa_name (mask
);
10797 if (bitop2
== BIT_NOT_EXPR
)
10798 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10800 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10803 = vect_finish_stmt_generation (vinfo
, stmt_info
,
10808 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10815 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10817 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10819 prev_stmt_info
= new_stmt_info
;
10822 vec_oprnds0
.release ();
10823 vec_oprnds1
.release ();
10828 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10829 can handle all live statements in the node. Otherwise return true
10830 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10831 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10834 can_vectorize_live_stmts (loop_vec_info loop_vinfo
,
10835 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10836 slp_tree slp_node
, slp_instance slp_node_instance
,
10838 stmt_vector_for_cost
*cost_vec
)
10842 stmt_vec_info slp_stmt_info
;
10844 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10846 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10847 && !vectorizable_live_operation (loop_vinfo
,
10848 slp_stmt_info
, gsi
, slp_node
,
10849 slp_node_instance
, i
,
10850 vec_stmt_p
, cost_vec
))
10854 else if (STMT_VINFO_LIVE_P (stmt_info
)
10855 && !vectorizable_live_operation (loop_vinfo
, stmt_info
, gsi
,
10856 slp_node
, slp_node_instance
, -1,
10857 vec_stmt_p
, cost_vec
))
10863 /* Make sure the statement is vectorizable. */
10866 vect_analyze_stmt (vec_info
*vinfo
,
10867 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10868 slp_tree node
, slp_instance node_instance
,
10869 stmt_vector_for_cost
*cost_vec
)
10871 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10872 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10874 gimple_seq pattern_def_seq
;
10876 if (dump_enabled_p ())
10877 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10880 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10881 return opt_result::failure_at (stmt_info
->stmt
,
10883 " stmt has volatile operands: %G\n",
10886 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10888 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10890 gimple_stmt_iterator si
;
10892 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10894 stmt_vec_info pattern_def_stmt_info
10895 = vinfo
->lookup_stmt (gsi_stmt (si
));
10896 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10897 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10899 /* Analyze def stmt of STMT if it's a pattern stmt. */
10900 if (dump_enabled_p ())
10901 dump_printf_loc (MSG_NOTE
, vect_location
,
10902 "==> examining pattern def statement: %G",
10903 pattern_def_stmt_info
->stmt
);
10906 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10907 need_to_vectorize
, node
, node_instance
,
10915 /* Skip stmts that do not need to be vectorized. In loops this is expected
10917 - the COND_EXPR which is the loop exit condition
10918 - any LABEL_EXPRs in the loop
10919 - computations that are used only for array indexing or loop control.
10920 In basic blocks we only analyze statements that are a part of some SLP
10921 instance, therefore, all the statements are relevant.
10923 Pattern statement needs to be analyzed instead of the original statement
10924 if the original statement is not relevant. Otherwise, we analyze both
10925 statements. In basic blocks we are called from some SLP instance
10926 traversal, don't analyze pattern stmts instead, the pattern stmts
10927 already will be part of SLP instance. */
10929 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10930 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10931 && !STMT_VINFO_LIVE_P (stmt_info
))
10933 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10934 && pattern_stmt_info
10935 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10936 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10938 /* Analyze PATTERN_STMT instead of the original stmt. */
10939 stmt_info
= pattern_stmt_info
;
10940 if (dump_enabled_p ())
10941 dump_printf_loc (MSG_NOTE
, vect_location
,
10942 "==> examining pattern statement: %G",
10947 if (dump_enabled_p ())
10948 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10950 return opt_result::success ();
10953 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10955 && pattern_stmt_info
10956 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10957 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10959 /* Analyze PATTERN_STMT too. */
10960 if (dump_enabled_p ())
10961 dump_printf_loc (MSG_NOTE
, vect_location
,
10962 "==> examining pattern statement: %G",
10963 pattern_stmt_info
->stmt
);
10966 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
10967 node_instance
, cost_vec
);
10972 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10974 case vect_internal_def
:
10977 case vect_reduction_def
:
10978 case vect_nested_cycle
:
10979 gcc_assert (!bb_vinfo
10980 && (relevance
== vect_used_in_outer
10981 || relevance
== vect_used_in_outer_by_reduction
10982 || relevance
== vect_used_by_reduction
10983 || relevance
== vect_unused_in_scope
10984 || relevance
== vect_used_only_live
));
10987 case vect_induction_def
:
10988 gcc_assert (!bb_vinfo
);
10991 case vect_constant_def
:
10992 case vect_external_def
:
10993 case vect_unknown_def_type
:
10995 gcc_unreachable ();
10998 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11000 tree type
= gimple_expr_type (stmt_info
->stmt
);
11001 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
11002 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11003 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11004 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11005 *need_to_vectorize
= true;
11008 if (PURE_SLP_STMT (stmt_info
) && !node
)
11010 if (dump_enabled_p ())
11011 dump_printf_loc (MSG_NOTE
, vect_location
,
11012 "handled only by SLP analysis\n");
11013 return opt_result::success ();
11018 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11019 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11020 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11021 -mveclibabi= takes preference over library functions with
11022 the simd attribute. */
11023 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11024 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11026 || vectorizable_conversion (vinfo
, stmt_info
,
11027 NULL
, NULL
, node
, cost_vec
)
11028 || vectorizable_operation (vinfo
, stmt_info
,
11029 NULL
, NULL
, node
, cost_vec
)
11030 || vectorizable_assignment (vinfo
, stmt_info
,
11031 NULL
, NULL
, node
, cost_vec
)
11032 || vectorizable_load (vinfo
, stmt_info
,
11033 NULL
, NULL
, node
, node_instance
, cost_vec
)
11034 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11035 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11036 node
, node_instance
, cost_vec
)
11037 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11038 NULL
, NULL
, node
, cost_vec
)
11039 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11040 || vectorizable_condition (vinfo
, stmt_info
,
11041 NULL
, NULL
, node
, cost_vec
)
11042 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11044 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11045 stmt_info
, NULL
, node
));
11049 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11050 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11051 NULL
, NULL
, node
, cost_vec
)
11052 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11054 || vectorizable_shift (vinfo
, stmt_info
,
11055 NULL
, NULL
, node
, cost_vec
)
11056 || vectorizable_operation (vinfo
, stmt_info
,
11057 NULL
, NULL
, node
, cost_vec
)
11058 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11060 || vectorizable_load (vinfo
, stmt_info
,
11061 NULL
, NULL
, node
, node_instance
, cost_vec
)
11062 || vectorizable_store (vinfo
, stmt_info
,
11063 NULL
, NULL
, node
, cost_vec
)
11064 || vectorizable_condition (vinfo
, stmt_info
,
11065 NULL
, NULL
, node
, cost_vec
)
11066 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11071 return opt_result::failure_at (stmt_info
->stmt
,
11073 " relevant stmt not supported: %G",
11076 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11077 need extra handling, except for vectorizable reductions. */
11079 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11080 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11081 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11082 stmt_info
, NULL
, node
, node_instance
,
11084 return opt_result::failure_at (stmt_info
->stmt
,
11086 " live stmt not supported: %G",
11089 return opt_result::success ();
11093 /* Function vect_transform_stmt.
11095 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11098 vect_transform_stmt (vec_info
*vinfo
,
11099 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11100 slp_tree slp_node
, slp_instance slp_node_instance
)
11102 bool is_store
= false;
11103 stmt_vec_info vec_stmt
= NULL
;
11106 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11107 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
11109 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
11110 bool nested_p
= (loop_vinfo
11111 && nested_in_vect_loop_p
11112 (LOOP_VINFO_LOOP (loop_vinfo
), stmt_info
));
11114 gimple
*stmt
= stmt_info
->stmt
;
11115 switch (STMT_VINFO_TYPE (stmt_info
))
11117 case type_demotion_vec_info_type
:
11118 case type_promotion_vec_info_type
:
11119 case type_conversion_vec_info_type
:
11120 done
= vectorizable_conversion (vinfo
, stmt_info
,
11121 gsi
, &vec_stmt
, slp_node
, NULL
);
11125 case induc_vec_info_type
:
11126 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11127 stmt_info
, gsi
, &vec_stmt
, slp_node
,
11132 case shift_vec_info_type
:
11133 done
= vectorizable_shift (vinfo
, stmt_info
,
11134 gsi
, &vec_stmt
, slp_node
, NULL
);
11138 case op_vec_info_type
:
11139 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11144 case assignment_vec_info_type
:
11145 done
= vectorizable_assignment (vinfo
, stmt_info
,
11146 gsi
, &vec_stmt
, slp_node
, NULL
);
11150 case load_vec_info_type
:
11151 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11152 slp_node_instance
, NULL
);
11156 case store_vec_info_type
:
11157 done
= vectorizable_store (vinfo
, stmt_info
,
11158 gsi
, &vec_stmt
, slp_node
, NULL
);
11160 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11162 /* In case of interleaving, the whole chain is vectorized when the
11163 last store in the chain is reached. Store stmts before the last
11164 one are skipped, and there vec_stmt_info shouldn't be freed
11166 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11167 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11174 case condition_vec_info_type
:
11175 done
= vectorizable_condition (vinfo
, stmt_info
,
11176 gsi
, &vec_stmt
, slp_node
, NULL
);
11180 case comparison_vec_info_type
:
11181 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11186 case call_vec_info_type
:
11187 done
= vectorizable_call (vinfo
, stmt_info
,
11188 gsi
, &vec_stmt
, slp_node
, NULL
);
11189 stmt
= gsi_stmt (*gsi
);
11192 case call_simd_clone_vec_info_type
:
11193 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11195 stmt
= gsi_stmt (*gsi
);
11198 case reduc_vec_info_type
:
11199 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11200 gsi
, &vec_stmt
, slp_node
);
11204 case cycle_phi_info_type
:
11205 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11206 &vec_stmt
, slp_node
, slp_node_instance
);
11210 case lc_phi_info_type
:
11211 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11212 stmt_info
, &vec_stmt
, slp_node
);
11217 if (!STMT_VINFO_LIVE_P (stmt_info
))
11219 if (dump_enabled_p ())
11220 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11221 "stmt not supported.\n");
11222 gcc_unreachable ();
11227 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11228 This would break hybrid SLP vectorization. */
11230 gcc_assert (!vec_stmt
11231 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
11233 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11234 is being vectorized, but outside the immediately enclosing loop. */
11237 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11238 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
11239 || STMT_VINFO_RELEVANT (stmt_info
) ==
11240 vect_used_in_outer_by_reduction
))
11242 class loop
*innerloop
= LOOP_VINFO_LOOP (loop_vinfo
)->inner
;
11243 imm_use_iterator imm_iter
;
11244 use_operand_p use_p
;
11247 if (dump_enabled_p ())
11248 dump_printf_loc (MSG_NOTE
, vect_location
,
11249 "Record the vdef for outer-loop vectorization.\n");
11251 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11252 (to be used when vectorizing outer-loop stmts that use the DEF of
11254 if (gimple_code (stmt
) == GIMPLE_PHI
)
11255 scalar_dest
= PHI_RESULT (stmt
);
11257 scalar_dest
= gimple_get_lhs (stmt
);
11259 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
11260 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
11262 stmt_vec_info exit_phi_info
11263 = vinfo
->lookup_stmt (USE_STMT (use_p
));
11264 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
11269 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
11271 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
11274 /* If this stmt defines a value used on a backedge, update the
11275 vectorized PHIs. */
11276 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
11277 stmt_vec_info reduc_info
;
11278 if (STMT_VINFO_REDUC_DEF (orig_stmt_info
)
11279 && vect_stmt_to_vectorize (orig_stmt_info
) == stmt_info
11280 && (reduc_info
= info_for_reduction (vinfo
, orig_stmt_info
))
11281 && STMT_VINFO_REDUC_TYPE (reduc_info
) != FOLD_LEFT_REDUCTION
11282 && STMT_VINFO_REDUC_TYPE (reduc_info
) != EXTRACT_LAST_REDUCTION
)
11287 && (phi
= dyn_cast
<gphi
*>
11288 (STMT_VINFO_REDUC_DEF (orig_stmt_info
)->stmt
))
11289 && dominated_by_p (CDI_DOMINATORS
,
11290 gimple_bb (orig_stmt_info
->stmt
), gimple_bb (phi
))
11291 && (e
= loop_latch_edge (gimple_bb (phi
)->loop_father
))
11292 && (PHI_ARG_DEF_FROM_EDGE (phi
, e
)
11293 == gimple_get_lhs (orig_stmt_info
->stmt
)))
11295 stmt_vec_info phi_info
11296 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info
));
11297 stmt_vec_info vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
11300 add_phi_arg (as_a
<gphi
*> (phi_info
->stmt
),
11301 gimple_get_lhs (vec_stmt
->stmt
), e
,
11302 gimple_phi_arg_location (phi
, e
->dest_idx
));
11303 phi_info
= STMT_VINFO_RELATED_STMT (phi_info
);
11304 vec_stmt
= STMT_VINFO_RELATED_STMT (vec_stmt
);
11307 gcc_assert (!vec_stmt
);
11310 && slp_node
!= slp_node_instance
->reduc_phis
)
11312 slp_tree phi_node
= slp_node_instance
->reduc_phis
;
11313 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
11314 e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
11315 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
11316 == SLP_TREE_VEC_STMTS (slp_node
).length ());
11317 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
11318 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]->stmt
),
11319 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]->stmt
),
11320 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
11324 /* Handle stmts whose DEF is used outside the loop-nest that is
11325 being vectorized. */
11326 if (is_a
<loop_vec_info
> (vinfo
))
11327 done
= can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11328 stmt_info
, gsi
, slp_node
,
11329 slp_node_instance
, true, NULL
);
11336 /* Remove a group of stores (for SLP or interleaving), free their
11340 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11342 stmt_vec_info next_stmt_info
= first_stmt_info
;
11344 while (next_stmt_info
)
11346 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11347 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11348 /* Free the attached stmt_vec_info and remove the stmt. */
11349 vinfo
->remove_stmt (next_stmt_info
);
11350 next_stmt_info
= tmp
;
11354 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11355 elements of type SCALAR_TYPE, or null if the target doesn't support
11358 If NUNITS is zero, return a vector type that contains elements of
11359 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11361 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11362 for this vectorization region and want to "autodetect" the best choice.
11363 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11364 and we want the new type to be interoperable with it. PREVAILING_MODE
11365 in this case can be a scalar integer mode or a vector mode; when it
11366 is a vector mode, the function acts like a tree-level version of
11367 related_vector_mode. */
11370 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11371 tree scalar_type
, poly_uint64 nunits
)
11373 tree orig_scalar_type
= scalar_type
;
11374 scalar_mode inner_mode
;
11375 machine_mode simd_mode
;
11378 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11379 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11382 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11384 /* For vector types of elements whose mode precision doesn't
11385 match their types precision we use a element type of mode
11386 precision. The vectorization routines will have to make sure
11387 they support the proper result truncation/extension.
11388 We also make sure to build vector types with INTEGER_TYPE
11389 component type only. */
11390 if (INTEGRAL_TYPE_P (scalar_type
)
11391 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11392 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11393 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11394 TYPE_UNSIGNED (scalar_type
));
11396 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11397 When the component mode passes the above test simply use a type
11398 corresponding to that mode. The theory is that any use that
11399 would cause problems with this will disable vectorization anyway. */
11400 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11401 && !INTEGRAL_TYPE_P (scalar_type
))
11402 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11404 /* We can't build a vector type of elements with alignment bigger than
11406 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11407 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11408 TYPE_UNSIGNED (scalar_type
));
11410 /* If we felt back to using the mode fail if there was
11411 no scalar type for it. */
11412 if (scalar_type
== NULL_TREE
)
11415 /* If no prevailing mode was supplied, use the mode the target prefers.
11416 Otherwise lookup a vector mode based on the prevailing mode. */
11417 if (prevailing_mode
== VOIDmode
)
11419 gcc_assert (known_eq (nunits
, 0U));
11420 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11421 if (SCALAR_INT_MODE_P (simd_mode
))
11423 /* Traditional behavior is not to take the integer mode
11424 literally, but simply to use it as a way of determining
11425 the vector size. It is up to mode_for_vector to decide
11426 what the TYPE_MODE should be.
11428 Note that nunits == 1 is allowed in order to support single
11429 element vector types. */
11430 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11431 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11435 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11436 || !related_vector_mode (prevailing_mode
,
11437 inner_mode
, nunits
).exists (&simd_mode
))
11439 /* Fall back to using mode_for_vector, mostly in the hope of being
11440 able to use an integer mode. */
11441 if (known_eq (nunits
, 0U)
11442 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11445 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11449 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11451 /* In cases where the mode was chosen by mode_for_vector, check that
11452 the target actually supports the chosen mode, or that it at least
11453 allows the vector mode to be replaced by a like-sized integer. */
11454 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11455 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11458 /* Re-attach the address-space qualifier if we canonicalized the scalar
11460 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11461 return build_qualified_type
11462 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11467 /* Function get_vectype_for_scalar_type.
11469 Returns the vector type corresponding to SCALAR_TYPE as supported
11470 by the target. If GROUP_SIZE is nonzero and we're performing BB
11471 vectorization, make sure that the number of elements in the vector
11472 is no bigger than GROUP_SIZE. */
11475 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11476 unsigned int group_size
)
11478 /* For BB vectorization, we should always have a group size once we've
11479 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11480 are tentative requests during things like early data reference
11481 analysis and pattern recognition. */
11482 if (is_a
<bb_vec_info
> (vinfo
))
11483 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11487 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11489 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11490 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11492 /* Register the natural choice of vector type, before the group size
11493 has been applied. */
11495 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11497 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11498 try again with an explicit number of elements. */
11501 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11503 /* Start with the biggest number of units that fits within
11504 GROUP_SIZE and halve it until we find a valid vector type.
11505 Usually either the first attempt will succeed or all will
11506 fail (in the latter case because GROUP_SIZE is too small
11507 for the target), but it's possible that a target could have
11508 a hole between supported vector types.
11510 If GROUP_SIZE is not a power of 2, this has the effect of
11511 trying the largest power of 2 that fits within the group,
11512 even though the group is not a multiple of that vector size.
11513 The BB vectorizer will then try to carve up the group into
11515 unsigned int nunits
= 1 << floor_log2 (group_size
);
11518 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11519 scalar_type
, nunits
);
11522 while (nunits
> 1 && !vectype
);
11528 /* Return the vector type corresponding to SCALAR_TYPE as supported
11529 by the target. NODE, if nonnull, is the SLP tree node that will
11530 use the returned vector type. */
11533 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11535 unsigned int group_size
= 0;
11538 group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
11539 if (group_size
== 0)
11540 group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
11542 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11545 /* Function get_mask_type_for_scalar_type.
11547 Returns the mask type corresponding to a result of comparison
11548 of vectors of specified SCALAR_TYPE as supported by target.
11549 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11550 make sure that the number of elements in the vector is no bigger
11551 than GROUP_SIZE. */
11554 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11555 unsigned int group_size
)
11557 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11562 return truth_type_for (vectype
);
11565 /* Function get_same_sized_vectype
11567 Returns a vector type corresponding to SCALAR_TYPE of size
11568 VECTOR_TYPE if supported by the target. */
11571 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11573 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11574 return truth_type_for (vector_type
);
11576 poly_uint64 nunits
;
11577 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11578 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11581 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11582 scalar_type
, nunits
);
11585 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11586 would not change the chosen vector modes. */
11589 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11591 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11592 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11593 if (!VECTOR_MODE_P (*i
)
11594 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11599 /* Function vect_is_simple_use.
11602 VINFO - the vect info of the loop or basic block that is being vectorized.
11603 OPERAND - operand in the loop or bb.
11605 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11606 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11607 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11608 the definition could be anywhere in the function
11609 DT - the type of definition
11611 Returns whether a stmt with OPERAND can be vectorized.
11612 For loops, supportable operands are constants, loop invariants, and operands
11613 that are defined by the current iteration of the loop. Unsupportable
11614 operands are those that are defined by a previous iteration of the loop (as
11615 is the case in reduction/induction computations).
11616 For basic blocks, supportable operands are constants and bb invariants.
11617 For now, operands defined outside the basic block are not supported. */
11620 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11621 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11623 if (def_stmt_info_out
)
11624 *def_stmt_info_out
= NULL
;
11626 *def_stmt_out
= NULL
;
11627 *dt
= vect_unknown_def_type
;
11629 if (dump_enabled_p ())
11631 dump_printf_loc (MSG_NOTE
, vect_location
,
11632 "vect_is_simple_use: operand ");
11633 if (TREE_CODE (operand
) == SSA_NAME
11634 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11635 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11637 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11640 if (CONSTANT_CLASS_P (operand
))
11641 *dt
= vect_constant_def
;
11642 else if (is_gimple_min_invariant (operand
))
11643 *dt
= vect_external_def
;
11644 else if (TREE_CODE (operand
) != SSA_NAME
)
11645 *dt
= vect_unknown_def_type
;
11646 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11647 *dt
= vect_external_def
;
11650 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11651 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11653 *dt
= vect_external_def
;
11656 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11657 def_stmt
= stmt_vinfo
->stmt
;
11658 switch (gimple_code (def_stmt
))
11661 case GIMPLE_ASSIGN
:
11663 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11666 *dt
= vect_unknown_def_type
;
11669 if (def_stmt_info_out
)
11670 *def_stmt_info_out
= stmt_vinfo
;
11673 *def_stmt_out
= def_stmt
;
11676 if (dump_enabled_p ())
11678 dump_printf (MSG_NOTE
, ", type of def: ");
11681 case vect_uninitialized_def
:
11682 dump_printf (MSG_NOTE
, "uninitialized\n");
11684 case vect_constant_def
:
11685 dump_printf (MSG_NOTE
, "constant\n");
11687 case vect_external_def
:
11688 dump_printf (MSG_NOTE
, "external\n");
11690 case vect_internal_def
:
11691 dump_printf (MSG_NOTE
, "internal\n");
11693 case vect_induction_def
:
11694 dump_printf (MSG_NOTE
, "induction\n");
11696 case vect_reduction_def
:
11697 dump_printf (MSG_NOTE
, "reduction\n");
11699 case vect_double_reduction_def
:
11700 dump_printf (MSG_NOTE
, "double reduction\n");
11702 case vect_nested_cycle
:
11703 dump_printf (MSG_NOTE
, "nested cycle\n");
11705 case vect_unknown_def_type
:
11706 dump_printf (MSG_NOTE
, "unknown\n");
11711 if (*dt
== vect_unknown_def_type
)
11713 if (dump_enabled_p ())
11714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11715 "Unsupported pattern.\n");
11722 /* Function vect_is_simple_use.
11724 Same as vect_is_simple_use but also determines the vector operand
11725 type of OPERAND and stores it to *VECTYPE. If the definition of
11726 OPERAND is vect_uninitialized_def, vect_constant_def or
11727 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11728 is responsible to compute the best suited vector type for the
11732 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11733 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11734 gimple
**def_stmt_out
)
11736 stmt_vec_info def_stmt_info
;
11738 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11742 *def_stmt_out
= def_stmt
;
11743 if (def_stmt_info_out
)
11744 *def_stmt_info_out
= def_stmt_info
;
11746 /* Now get a vector type if the def is internal, otherwise supply
11747 NULL_TREE and leave it up to the caller to figure out a proper
11748 type for the use stmt. */
11749 if (*dt
== vect_internal_def
11750 || *dt
== vect_induction_def
11751 || *dt
== vect_reduction_def
11752 || *dt
== vect_double_reduction_def
11753 || *dt
== vect_nested_cycle
)
11755 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11756 gcc_assert (*vectype
!= NULL_TREE
);
11757 if (dump_enabled_p ())
11758 dump_printf_loc (MSG_NOTE
, vect_location
,
11759 "vect_is_simple_use: vectype %T\n", *vectype
);
11761 else if (*dt
== vect_uninitialized_def
11762 || *dt
== vect_constant_def
11763 || *dt
== vect_external_def
)
11764 *vectype
= NULL_TREE
;
11766 gcc_unreachable ();
11772 /* Function supportable_widening_operation
11774 Check whether an operation represented by the code CODE is a
11775 widening operation that is supported by the target platform in
11776 vector form (i.e., when operating on arguments of type VECTYPE_IN
11777 producing a result of type VECTYPE_OUT).
11779 Widening operations we currently support are NOP (CONVERT), FLOAT,
11780 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11781 are supported by the target platform either directly (via vector
11782 tree-codes), or via target builtins.
11785 - CODE1 and CODE2 are codes of vector operations to be used when
11786 vectorizing the operation, if available.
11787 - MULTI_STEP_CVT determines the number of required intermediate steps in
11788 case of multi-step conversion (like char->short->int - in that case
11789 MULTI_STEP_CVT will be 1).
11790 - INTERM_TYPES contains the intermediate type required to perform the
11791 widening operation (short in the above example). */
11794 supportable_widening_operation (vec_info
*vinfo
,
11795 enum tree_code code
, stmt_vec_info stmt_info
,
11796 tree vectype_out
, tree vectype_in
,
11797 enum tree_code
*code1
, enum tree_code
*code2
,
11798 int *multi_step_cvt
,
11799 vec
<tree
> *interm_types
)
11801 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11802 class loop
*vect_loop
= NULL
;
11803 machine_mode vec_mode
;
11804 enum insn_code icode1
, icode2
;
11805 optab optab1
, optab2
;
11806 tree vectype
= vectype_in
;
11807 tree wide_vectype
= vectype_out
;
11808 enum tree_code c1
, c2
;
11810 tree prev_type
, intermediate_type
;
11811 machine_mode intermediate_mode
, prev_mode
;
11812 optab optab3
, optab4
;
11814 *multi_step_cvt
= 0;
11816 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11820 case WIDEN_MULT_EXPR
:
11821 /* The result of a vectorized widening operation usually requires
11822 two vectors (because the widened results do not fit into one vector).
11823 The generated vector results would normally be expected to be
11824 generated in the same order as in the original scalar computation,
11825 i.e. if 8 results are generated in each vector iteration, they are
11826 to be organized as follows:
11827 vect1: [res1,res2,res3,res4],
11828 vect2: [res5,res6,res7,res8].
11830 However, in the special case that the result of the widening
11831 operation is used in a reduction computation only, the order doesn't
11832 matter (because when vectorizing a reduction we change the order of
11833 the computation). Some targets can take advantage of this and
11834 generate more efficient code. For example, targets like Altivec,
11835 that support widen_mult using a sequence of {mult_even,mult_odd}
11836 generate the following vectors:
11837 vect1: [res1,res3,res5,res7],
11838 vect2: [res2,res4,res6,res8].
11840 When vectorizing outer-loops, we execute the inner-loop sequentially
11841 (each vectorized inner-loop iteration contributes to VF outer-loop
11842 iterations in parallel). We therefore don't allow to change the
11843 order of the computation in the inner-loop during outer-loop
11845 /* TODO: Another case in which order doesn't *really* matter is when we
11846 widen and then contract again, e.g. (short)((int)x * y >> 8).
11847 Normally, pack_trunc performs an even/odd permute, whereas the
11848 repack from an even/odd expansion would be an interleave, which
11849 would be significantly simpler for e.g. AVX2. */
11850 /* In any case, in order to avoid duplicating the code below, recurse
11851 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11852 are properly set up for the caller. If we fail, we'll continue with
11853 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11855 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11856 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11857 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11858 stmt_info
, vectype_out
,
11859 vectype_in
, code1
, code2
,
11860 multi_step_cvt
, interm_types
))
11862 /* Elements in a vector with vect_used_by_reduction property cannot
11863 be reordered if the use chain with this property does not have the
11864 same operation. One such an example is s += a * b, where elements
11865 in a and b cannot be reordered. Here we check if the vector defined
11866 by STMT is only directly used in the reduction statement. */
11867 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11868 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11870 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11873 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11874 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11877 case DOT_PROD_EXPR
:
11878 c1
= DOT_PROD_EXPR
;
11879 c2
= DOT_PROD_EXPR
;
11887 case VEC_WIDEN_MULT_EVEN_EXPR
:
11888 /* Support the recursion induced just above. */
11889 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11890 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11893 case WIDEN_LSHIFT_EXPR
:
11894 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11895 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11899 c1
= VEC_UNPACK_LO_EXPR
;
11900 c2
= VEC_UNPACK_HI_EXPR
;
11904 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11905 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11908 case FIX_TRUNC_EXPR
:
11909 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11910 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11914 gcc_unreachable ();
11917 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11918 std::swap (c1
, c2
);
11920 if (code
== FIX_TRUNC_EXPR
)
11922 /* The signedness is determined from output operand. */
11923 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11924 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11926 else if (CONVERT_EXPR_CODE_P (code
)
11927 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11928 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11929 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11930 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11932 /* If the input and result modes are the same, a different optab
11933 is needed where we pass in the number of units in vectype. */
11934 optab1
= vec_unpacks_sbool_lo_optab
;
11935 optab2
= vec_unpacks_sbool_hi_optab
;
11939 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11940 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11943 if (!optab1
|| !optab2
)
11946 vec_mode
= TYPE_MODE (vectype
);
11947 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11948 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11954 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11955 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11957 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11959 /* For scalar masks we may have different boolean
11960 vector types having the same QImode. Thus we
11961 add additional check for elements number. */
11962 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11963 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11967 /* Check if it's a multi-step conversion that can be done using intermediate
11970 prev_type
= vectype
;
11971 prev_mode
= vec_mode
;
11973 if (!CONVERT_EXPR_CODE_P (code
))
11976 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11977 intermediate steps in promotion sequence. We try
11978 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11980 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11981 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11983 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11984 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11986 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11989 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11990 TYPE_UNSIGNED (prev_type
));
11992 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11993 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11994 && intermediate_mode
== prev_mode
11995 && SCALAR_INT_MODE_P (prev_mode
))
11997 /* If the input and result modes are the same, a different optab
11998 is needed where we pass in the number of units in vectype. */
11999 optab3
= vec_unpacks_sbool_lo_optab
;
12000 optab4
= vec_unpacks_sbool_hi_optab
;
12004 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12005 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12008 if (!optab3
|| !optab4
12009 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12010 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12011 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12012 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12013 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12014 == CODE_FOR_nothing
)
12015 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12016 == CODE_FOR_nothing
))
12019 interm_types
->quick_push (intermediate_type
);
12020 (*multi_step_cvt
)++;
12022 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12023 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12025 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12027 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12028 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12032 prev_type
= intermediate_type
;
12033 prev_mode
= intermediate_mode
;
12036 interm_types
->release ();
12041 /* Function supportable_narrowing_operation
12043 Check whether an operation represented by the code CODE is a
12044 narrowing operation that is supported by the target platform in
12045 vector form (i.e., when operating on arguments of type VECTYPE_IN
12046 and producing a result of type VECTYPE_OUT).
12048 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12049 and FLOAT. This function checks if these operations are supported by
12050 the target platform directly via vector tree-codes.
12053 - CODE1 is the code of a vector operation to be used when
12054 vectorizing the operation, if available.
12055 - MULTI_STEP_CVT determines the number of required intermediate steps in
12056 case of multi-step conversion (like int->short->char - in that case
12057 MULTI_STEP_CVT will be 1).
12058 - INTERM_TYPES contains the intermediate type required to perform the
12059 narrowing operation (short in the above example). */
12062 supportable_narrowing_operation (enum tree_code code
,
12063 tree vectype_out
, tree vectype_in
,
12064 enum tree_code
*code1
, int *multi_step_cvt
,
12065 vec
<tree
> *interm_types
)
12067 machine_mode vec_mode
;
12068 enum insn_code icode1
;
12069 optab optab1
, interm_optab
;
12070 tree vectype
= vectype_in
;
12071 tree narrow_vectype
= vectype_out
;
12073 tree intermediate_type
, prev_type
;
12074 machine_mode intermediate_mode
, prev_mode
;
12078 *multi_step_cvt
= 0;
12082 c1
= VEC_PACK_TRUNC_EXPR
;
12083 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12084 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12085 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
12086 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12087 optab1
= vec_pack_sbool_trunc_optab
;
12089 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12092 case FIX_TRUNC_EXPR
:
12093 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12094 /* The signedness is determined from output operand. */
12095 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12099 c1
= VEC_PACK_FLOAT_EXPR
;
12100 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12104 gcc_unreachable ();
12110 vec_mode
= TYPE_MODE (vectype
);
12111 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12116 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12118 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12120 /* For scalar masks we may have different boolean
12121 vector types having the same QImode. Thus we
12122 add additional check for elements number. */
12123 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12124 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12128 if (code
== FLOAT_EXPR
)
12131 /* Check if it's a multi-step conversion that can be done using intermediate
12133 prev_mode
= vec_mode
;
12134 prev_type
= vectype
;
12135 if (code
== FIX_TRUNC_EXPR
)
12136 uns
= TYPE_UNSIGNED (vectype_out
);
12138 uns
= TYPE_UNSIGNED (vectype
);
12140 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12141 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12142 costly than signed. */
12143 if (code
== FIX_TRUNC_EXPR
&& uns
)
12145 enum insn_code icode2
;
12148 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12150 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12151 if (interm_optab
!= unknown_optab
12152 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12153 && insn_data
[icode1
].operand
[0].mode
12154 == insn_data
[icode2
].operand
[0].mode
)
12157 optab1
= interm_optab
;
12162 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12163 intermediate steps in promotion sequence. We try
12164 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12165 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12166 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12168 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12169 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12171 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12174 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12175 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12176 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12177 && intermediate_mode
== prev_mode
12178 && SCALAR_INT_MODE_P (prev_mode
))
12179 interm_optab
= vec_pack_sbool_trunc_optab
;
12182 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12185 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12186 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12187 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12188 == CODE_FOR_nothing
))
12191 interm_types
->quick_push (intermediate_type
);
12192 (*multi_step_cvt
)++;
12194 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12196 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12198 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12199 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12203 prev_mode
= intermediate_mode
;
12204 prev_type
= intermediate_type
;
12205 optab1
= interm_optab
;
12208 interm_types
->release ();
12212 /* Generate and return a statement that sets vector mask MASK such that
12213 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12216 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
12218 tree cmp_type
= TREE_TYPE (start_index
);
12219 tree mask_type
= TREE_TYPE (mask
);
12220 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12221 cmp_type
, mask_type
,
12222 OPTIMIZE_FOR_SPEED
));
12223 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12224 start_index
, end_index
,
12225 build_zero_cst (mask_type
));
12226 gimple_call_set_lhs (call
, mask
);
12230 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12231 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12234 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12237 tree tmp
= make_ssa_name (mask_type
);
12238 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
12239 gimple_seq_add_stmt (seq
, call
);
12240 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12243 /* Try to compute the vector types required to vectorize STMT_INFO,
12244 returning true on success and false if vectorization isn't possible.
12245 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12246 take sure that the number of elements in the vectors is no bigger
12251 - Set *STMT_VECTYPE_OUT to:
12252 - NULL_TREE if the statement doesn't need to be vectorized;
12253 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12255 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12256 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12257 statement does not help to determine the overall number of units. */
12260 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12261 tree
*stmt_vectype_out
,
12262 tree
*nunits_vectype_out
,
12263 unsigned int group_size
)
12265 gimple
*stmt
= stmt_info
->stmt
;
12267 /* For BB vectorization, we should always have a group size once we've
12268 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12269 are tentative requests during things like early data reference
12270 analysis and pattern recognition. */
12271 if (is_a
<bb_vec_info
> (vinfo
))
12272 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12276 *stmt_vectype_out
= NULL_TREE
;
12277 *nunits_vectype_out
= NULL_TREE
;
12279 if (gimple_get_lhs (stmt
) == NULL_TREE
12280 /* MASK_STORE has no lhs, but is ok. */
12281 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12283 if (is_a
<gcall
*> (stmt
))
12285 /* Ignore calls with no lhs. These must be calls to
12286 #pragma omp simd functions, and what vectorization factor
12287 it really needs can't be determined until
12288 vectorizable_simd_clone_call. */
12289 if (dump_enabled_p ())
12290 dump_printf_loc (MSG_NOTE
, vect_location
,
12291 "defer to SIMD clone analysis.\n");
12292 return opt_result::success ();
12295 return opt_result::failure_at (stmt
,
12296 "not vectorized: irregular stmt.%G", stmt
);
12299 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
12300 return opt_result::failure_at (stmt
,
12301 "not vectorized: vector stmt in loop:%G",
12305 tree scalar_type
= NULL_TREE
;
12306 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12308 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12309 if (dump_enabled_p ())
12310 dump_printf_loc (MSG_NOTE
, vect_location
,
12311 "precomputed vectype: %T\n", vectype
);
12313 else if (vect_use_mask_type_p (stmt_info
))
12315 unsigned int precision
= stmt_info
->mask_precision
;
12316 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12317 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12319 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12320 " data-type %T\n", scalar_type
);
12321 if (dump_enabled_p ())
12322 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12326 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12327 scalar_type
= TREE_TYPE (DR_REF (dr
));
12328 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12329 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12331 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12333 if (dump_enabled_p ())
12336 dump_printf_loc (MSG_NOTE
, vect_location
,
12337 "get vectype for scalar type (group size %d):"
12338 " %T\n", group_size
, scalar_type
);
12340 dump_printf_loc (MSG_NOTE
, vect_location
,
12341 "get vectype for scalar type: %T\n", scalar_type
);
12343 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12345 return opt_result::failure_at (stmt
,
12347 " unsupported data-type %T\n",
12350 if (dump_enabled_p ())
12351 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12353 *stmt_vectype_out
= vectype
;
12355 /* Don't try to compute scalar types if the stmt produces a boolean
12356 vector; use the existing vector type instead. */
12357 tree nunits_vectype
= vectype
;
12358 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12360 /* The number of units is set according to the smallest scalar
12361 type (or the largest vector size, but we only support one
12362 vector size per vectorization). */
12363 HOST_WIDE_INT dummy
;
12364 scalar_type
= vect_get_smallest_scalar_type (stmt_info
, &dummy
, &dummy
);
12365 if (scalar_type
!= TREE_TYPE (vectype
))
12367 if (dump_enabled_p ())
12368 dump_printf_loc (MSG_NOTE
, vect_location
,
12369 "get vectype for smallest scalar type: %T\n",
12371 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12373 if (!nunits_vectype
)
12374 return opt_result::failure_at
12375 (stmt
, "not vectorized: unsupported data-type %T\n",
12377 if (dump_enabled_p ())
12378 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12383 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12384 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)));
12386 if (dump_enabled_p ())
12388 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12389 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12390 dump_printf (MSG_NOTE
, "\n");
12393 *nunits_vectype_out
= nunits_vectype
;
12394 return opt_result::success ();