1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
53 /* For lang_hooks.types.type_for_mode. */
54 #include "langhooks.h"
56 /* Says whether a statement is a load, a store of a vectorized statement
57 result, or a store of an invariant value. */
58 enum vec_load_store_type
{
64 /* Return the vectorized type for the given statement. */
67 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
69 return STMT_VINFO_VECTYPE (stmt_info
);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
75 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
77 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
78 basic_block bb
= gimple_bb (stmt
);
79 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
85 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
87 return (bb
->loop_father
== loop
->inner
);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
95 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
96 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
97 int misalign
, enum vect_cost_model_location where
)
99 if ((kind
== vector_load
|| kind
== unaligned_load
)
100 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
101 kind
= vector_gather_load
;
102 if ((kind
== vector_store
|| kind
== unaligned_store
)
103 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
104 kind
= vector_scatter_store
;
107 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
108 stmt_info_for_cost si
= { count
, kind
,
109 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
111 body_cost_vec
->safe_push (si
);
113 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
116 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
117 count
, kind
, stmt_info
, misalign
, where
);
120 /* Return a variable of type ELEM_TYPE[NELEMS]. */
123 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
125 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
129 /* ARRAY is an array of vectors created by create_vector_array.
130 Return an SSA_NAME for the vector in index N. The reference
131 is part of the vectorization of STMT and the vector is associated
132 with scalar destination SCALAR_DEST. */
135 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
136 tree array
, unsigned HOST_WIDE_INT n
)
138 tree vect_type
, vect
, vect_name
, array_ref
;
141 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
142 vect_type
= TREE_TYPE (TREE_TYPE (array
));
143 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
144 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
145 build_int_cst (size_type_node
, n
),
146 NULL_TREE
, NULL_TREE
);
148 new_stmt
= gimple_build_assign (vect
, array_ref
);
149 vect_name
= make_ssa_name (vect
, new_stmt
);
150 gimple_assign_set_lhs (new_stmt
, vect_name
);
151 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Emit code to store SSA_NAME VECT in index N of the array.
158 The store is part of the vectorization of STMT. */
161 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
162 tree array
, unsigned HOST_WIDE_INT n
)
167 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
168 build_int_cst (size_type_node
, n
),
169 NULL_TREE
, NULL_TREE
);
171 new_stmt
= gimple_build_assign (array_ref
, vect
);
172 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
175 /* PTR is a pointer to an array of type TYPE. Return a representation
176 of *PTR. The memory reference replaces those in FIRST_DR
180 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
184 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
185 /* Arrays have the same alignment as their type. */
186 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
190 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192 /* Function vect_mark_relevant.
194 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
198 enum vect_relevant relevant
, bool live_p
)
200 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
201 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
202 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
203 gimple
*pattern_stmt
;
205 if (dump_enabled_p ())
207 dump_printf_loc (MSG_NOTE
, vect_location
,
208 "mark relevant %d, live %d: ", relevant
, live_p
);
209 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_info
= vinfo_for_stmt (pattern_stmt
);
230 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
231 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
232 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
264 if (!is_gimple_assign (stmt
))
267 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
269 enum vect_def_type dt
= vect_uninitialized_def
;
271 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
275 "use not simple.\n");
279 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
285 /* Function vect_stmt_relevant_p.
287 Return true if STMT in loop that is represented by LOOP_VINFO is
288 "relevant for vectorization".
290 A stmt is considered "relevant for vectorization" if:
291 - it has uses outside the loop.
292 - it has vdefs (it alters memory).
293 - control stmts in the loop (except for the exit condition).
295 CHECKME: what other side effects would the vectorizer allow? */
298 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
299 enum vect_relevant
*relevant
, bool *live_p
)
301 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
303 imm_use_iterator imm_iter
;
307 *relevant
= vect_unused_in_scope
;
310 /* cond stmt other than loop exit cond. */
311 if (is_ctrl_stmt (stmt
)
312 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
313 != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt
)
319 && !gimple_clobber_p (stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (dump_enabled_p ())
336 dump_printf_loc (MSG_NOTE
, vect_location
,
337 "vec_stmt_relevant_p: used out of loop.\n");
339 if (is_gimple_debug (USE_STMT (use_p
)))
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT. Check if USE is
368 used in STMT for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
374 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
376 /* USE corresponds to some operand in STMT. If there is no data
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info
))
382 /* STMT has a data_ref. FORNOW this means that its of one of
386 (This should have been verified in analyze_data_refs).
388 'var' in the second case corresponds to a def, not a use,
389 so USE cannot correspond to any operands that are not used
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
395 if (!gimple_assign_copy_p (stmt
))
397 if (is_gimple_call (stmt
)
398 && gimple_call_internal_p (stmt
))
399 switch (gimple_call_internal_fn (stmt
))
402 operand
= gimple_call_arg (stmt
, 3);
407 operand
= gimple_call_arg (stmt
, 2);
417 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
419 operand
= gimple_assign_rhs1 (stmt
);
420 if (TREE_CODE (operand
) != SSA_NAME
)
431 Function process_use.
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
458 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
459 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
462 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
463 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
464 stmt_vec_info dstmt_vinfo
;
465 basic_block bb
, def_bb
;
467 enum vect_def_type dt
;
469 /* case 1: we are only interested in uses that need to be vectorized. Uses
470 that are used for address computation are not considered relevant. */
471 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
474 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
478 "not vectorized: unsupported use in stmt.\n");
482 if (!def_stmt
|| gimple_nop_p (def_stmt
))
485 def_bb
= gimple_bb (def_stmt
);
486 if (!flow_bb_inside_loop_p (loop
, def_bb
))
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
499 bb
= gimple_bb (stmt
);
500 if (gimple_code (stmt
) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
502 && gimple_code (def_stmt
) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
504 && bb
->loop_father
== def_bb
->loop_father
)
506 if (dump_enabled_p ())
507 dump_printf_loc (MSG_NOTE
, vect_location
,
508 "reduc-stmt defining reduc-phi in the same nest.\n");
509 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
510 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
511 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
512 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
513 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
517 /* case 3a: outer-loop stmt defining an inner-loop stmt:
518 outer-loop-header-bb:
524 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
526 if (dump_enabled_p ())
527 dump_printf_loc (MSG_NOTE
, vect_location
,
528 "outer-loop def-stmt defining inner-loop stmt.\n");
532 case vect_unused_in_scope
:
533 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
534 vect_used_in_scope
: vect_unused_in_scope
;
537 case vect_used_in_outer_by_reduction
:
538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
539 relevant
= vect_used_by_reduction
;
542 case vect_used_in_outer
:
543 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
544 relevant
= vect_used_in_scope
;
547 case vect_used_in_scope
:
555 /* case 3b: inner-loop stmt defining an outer-loop stmt:
556 outer-loop-header-bb:
560 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
564 if (dump_enabled_p ())
565 dump_printf_loc (MSG_NOTE
, vect_location
,
566 "inner-loop def-stmt defining outer-loop stmt.\n");
570 case vect_unused_in_scope
:
571 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
572 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
573 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
576 case vect_used_by_reduction
:
577 case vect_used_only_live
:
578 relevant
= vect_used_in_outer_by_reduction
;
581 case vect_used_in_scope
:
582 relevant
= vect_used_in_outer
;
589 /* We are also not interested in uses on loop PHI backedges that are
590 inductions. Otherwise we'll needlessly vectorize the IV increment
591 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 else if (gimple_code (stmt
) == GIMPLE_PHI
594 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
595 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
596 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE
, vect_location
,
601 "induction value on backedge.\n");
606 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
611 /* Function vect_mark_stmts_to_be_vectorized.
613 Not all stmts in the loop need to be vectorized. For example:
622 Stmt 1 and 3 do not need to be vectorized, because loop control and
623 addressing of vectorized data-refs are handled differently.
625 This pass detects such stmts. */
628 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
630 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
631 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
632 unsigned int nbbs
= loop
->num_nodes
;
633 gimple_stmt_iterator si
;
636 stmt_vec_info stmt_vinfo
;
640 enum vect_relevant relevant
;
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "=== vect_mark_stmts_to_be_vectorized ===\n");
646 auto_vec
<gimple
*, 64> worklist
;
648 /* 1. Init worklist. */
649 for (i
= 0; i
< nbbs
; i
++)
652 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
658 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
661 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
662 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
664 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
666 stmt
= gsi_stmt (si
);
667 if (dump_enabled_p ())
669 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
670 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
673 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
674 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
678 /* 2. Process_worklist */
679 while (worklist
.length () > 0)
684 stmt
= worklist
.pop ();
685 if (dump_enabled_p ())
687 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
688 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
691 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
692 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 stmt_vinfo
= vinfo_for_stmt (stmt
);
695 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
697 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
698 propagated as is to the DEF_STMTs of its USEs.
700 One exception is when STMT has been identified as defining a reduction
701 variable; in this case we set the relevance to vect_used_by_reduction.
702 This is because we distinguish between two kinds of relevant stmts -
703 those that are used by a reduction computation, and those that are
704 (also) used by a regular computation. This allows us later on to
705 identify stmts that are used solely by a reduction, and therefore the
706 order of the results that they produce does not have to be kept. */
708 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
710 case vect_reduction_def
:
711 gcc_assert (relevant
!= vect_unused_in_scope
);
712 if (relevant
!= vect_unused_in_scope
713 && relevant
!= vect_used_in_scope
714 && relevant
!= vect_used_by_reduction
715 && relevant
!= vect_used_only_live
)
717 if (dump_enabled_p ())
718 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
719 "unsupported use of reduction.\n");
724 case vect_nested_cycle
:
725 if (relevant
!= vect_unused_in_scope
726 && relevant
!= vect_used_in_outer_by_reduction
727 && relevant
!= vect_used_in_outer
)
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
731 "unsupported use of nested cycle.\n");
737 case vect_double_reduction_def
:
738 if (relevant
!= vect_unused_in_scope
739 && relevant
!= vect_used_by_reduction
740 && relevant
!= vect_used_only_live
)
742 if (dump_enabled_p ())
743 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
744 "unsupported use of double reduction.\n");
754 if (is_pattern_stmt_p (stmt_vinfo
))
756 /* Pattern statements are not inserted into the code, so
757 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
758 have to scan the RHS or function arguments instead. */
759 if (is_gimple_assign (stmt
))
761 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
762 tree op
= gimple_assign_rhs1 (stmt
);
765 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
767 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
768 relevant
, &worklist
, false)
769 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
770 relevant
, &worklist
, false))
774 for (; i
< gimple_num_ops (stmt
); i
++)
776 op
= gimple_op (stmt
, i
);
777 if (TREE_CODE (op
) == SSA_NAME
778 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
783 else if (is_gimple_call (stmt
))
785 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
787 tree arg
= gimple_call_arg (stmt
, i
);
788 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
795 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
797 tree op
= USE_FROM_PTR (use_p
);
798 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
803 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
805 gather_scatter_info gs_info
;
806 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
808 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
812 } /* while worklist */
818 /* Function vect_model_simple_cost.
820 Models cost for simple operations, i.e. those that only emit ncopies of a
821 single op. Right now, this does not account for multiple insns that could
822 be generated for the single vector op. We will handle that shortly. */
825 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
826 enum vect_def_type
*dt
,
828 stmt_vector_for_cost
*prologue_cost_vec
,
829 stmt_vector_for_cost
*body_cost_vec
)
832 int inside_cost
= 0, prologue_cost
= 0;
834 /* The SLP costs were already calculated during SLP tree build. */
835 if (PURE_SLP_STMT (stmt_info
))
838 /* Cost the "broadcast" of a scalar operand in to a vector operand.
839 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 for (i
= 0; i
< ndts
; i
++)
842 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
843 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
844 stmt_info
, 0, vect_prologue
);
846 /* Pass the inside-of-loop statements to the target-specific cost model. */
847 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
848 stmt_info
, 0, vect_body
);
850 if (dump_enabled_p ())
851 dump_printf_loc (MSG_NOTE
, vect_location
,
852 "vect_model_simple_cost: inside_cost = %d, "
853 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
857 /* Model cost for type demotion and promotion operations. PWR is normally
858 zero for single-step promotions and demotions. It will be one if
859 two-step promotion/demotion is required, and so on. Each additional
860 step doubles the number of instructions required. */
863 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
864 enum vect_def_type
*dt
, int pwr
)
867 int inside_cost
= 0, prologue_cost
= 0;
868 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
869 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
870 void *target_cost_data
;
872 /* The SLP costs were already calculated during SLP tree build. */
873 if (PURE_SLP_STMT (stmt_info
))
877 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
879 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
881 for (i
= 0; i
< pwr
+ 1; i
++)
883 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
885 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
886 vec_promote_demote
, stmt_info
, 0,
890 /* FORNOW: Assuming maximum 2 args per stmts. */
891 for (i
= 0; i
< 2; i
++)
892 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
893 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
894 stmt_info
, 0, vect_prologue
);
896 if (dump_enabled_p ())
897 dump_printf_loc (MSG_NOTE
, vect_location
,
898 "vect_model_promotion_demotion_cost: inside_cost = %d, "
899 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
902 /* Function vect_model_store_cost
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
908 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
909 vect_memory_access_type memory_access_type
,
910 enum vect_def_type dt
, slp_tree slp_node
,
911 stmt_vector_for_cost
*prologue_cost_vec
,
912 stmt_vector_for_cost
*body_cost_vec
)
914 unsigned int inside_cost
= 0, prologue_cost
= 0;
915 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
916 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
917 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
919 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
920 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
921 stmt_info
, 0, vect_prologue
);
923 /* Grouped stores update all elements in the group at once,
924 so we want the DR for the first statement. */
925 if (!slp_node
&& grouped_access_p
)
927 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
928 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
941 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
943 /* Uses a high and low interleave or shuffle operations for each
945 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
946 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
947 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
948 stmt_info
, 0, vect_body
);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE
, vect_location
,
952 "vect_model_store_cost: strided group_size = %d .\n",
956 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
957 /* Costs of the stores. */
958 if (memory_access_type
== VMAT_ELEMENTWISE
959 || memory_access_type
== VMAT_GATHER_SCATTER
)
960 /* N scalar stores plus extracting the elements. */
961 inside_cost
+= record_stmt_cost (body_cost_vec
,
962 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
963 scalar_store
, stmt_info
, 0, vect_body
);
965 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
967 if (memory_access_type
== VMAT_ELEMENTWISE
968 || memory_access_type
== VMAT_STRIDED_SLP
)
969 inside_cost
+= record_stmt_cost (body_cost_vec
,
970 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
971 vec_to_scalar
, stmt_info
, 0, vect_body
);
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE
, vect_location
,
975 "vect_model_store_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
980 /* Calculate cost of DR's memory access. */
982 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
983 unsigned int *inside_cost
,
984 stmt_vector_for_cost
*body_cost_vec
)
986 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
987 gimple
*stmt
= DR_STMT (dr
);
988 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
990 switch (alignment_support_scheme
)
994 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
995 vector_store
, stmt_info
, 0,
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE
, vect_location
,
1000 "vect_model_store_cost: aligned.\n");
1004 case dr_unaligned_supported
:
1006 /* Here, we assign an additional cost for the unaligned store. */
1007 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1008 unaligned_store
, stmt_info
,
1009 DR_MISALIGNMENT (dr
), vect_body
);
1010 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_NOTE
, vect_location
,
1012 "vect_model_store_cost: unaligned supported by "
1017 case dr_unaligned_unsupported
:
1019 *inside_cost
= VECT_MAX_COST
;
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1023 "vect_model_store_cost: unsupported access.\n");
1033 /* Function vect_model_load_cost
1035 Models cost for loads. In the case of grouped accesses, one access has
1036 the overhead of the grouped access attributed to it. Since unaligned
1037 accesses are supported for loads, we also account for the costs of the
1038 access scheme chosen. */
1041 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1042 vect_memory_access_type memory_access_type
,
1044 stmt_vector_for_cost
*prologue_cost_vec
,
1045 stmt_vector_for_cost
*body_cost_vec
)
1047 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1048 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1049 unsigned int inside_cost
= 0, prologue_cost
= 0;
1050 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1052 /* Grouped loads read all elements in the group at once,
1053 so we want the DR for the first statement. */
1054 if (!slp_node
&& grouped_access_p
)
1056 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1057 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1060 /* True if we should include any once-per-group costs as well as
1061 the cost of the statement itself. For SLP we only get called
1062 once per group anyhow. */
1063 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1065 /* We assume that the cost of a single load-lanes instruction is
1066 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1067 access is instead being provided by a load-and-permute operation,
1068 include the cost of the permutes. */
1070 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1072 /* Uses an even and odd extract operations or shuffle operations
1073 for each needed permute. */
1074 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1075 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1076 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1077 stmt_info
, 0, vect_body
);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE
, vect_location
,
1081 "vect_model_load_cost: strided group_size = %d .\n",
1085 /* The loads themselves. */
1086 if (memory_access_type
== VMAT_ELEMENTWISE
1087 || memory_access_type
== VMAT_GATHER_SCATTER
)
1089 /* N scalar loads plus gathering them into a vector. */
1090 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1091 inside_cost
+= record_stmt_cost (body_cost_vec
,
1092 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1093 scalar_load
, stmt_info
, 0, vect_body
);
1096 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1097 &inside_cost
, &prologue_cost
,
1098 prologue_cost_vec
, body_cost_vec
, true);
1099 if (memory_access_type
== VMAT_ELEMENTWISE
1100 || memory_access_type
== VMAT_STRIDED_SLP
)
1101 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1102 stmt_info
, 0, vect_body
);
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE
, vect_location
,
1106 "vect_model_load_cost: inside_cost = %d, "
1107 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1111 /* Calculate cost of DR's memory access. */
1113 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1114 bool add_realign_cost
, unsigned int *inside_cost
,
1115 unsigned int *prologue_cost
,
1116 stmt_vector_for_cost
*prologue_cost_vec
,
1117 stmt_vector_for_cost
*body_cost_vec
,
1118 bool record_prologue_costs
)
1120 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1121 gimple
*stmt
= DR_STMT (dr
);
1122 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1124 switch (alignment_support_scheme
)
1128 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1129 stmt_info
, 0, vect_body
);
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE
, vect_location
,
1133 "vect_model_load_cost: aligned.\n");
1137 case dr_unaligned_supported
:
1139 /* Here, we assign an additional cost for the unaligned load. */
1140 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1141 unaligned_load
, stmt_info
,
1142 DR_MISALIGNMENT (dr
), vect_body
);
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE
, vect_location
,
1146 "vect_model_load_cost: unaligned supported by "
1151 case dr_explicit_realign
:
1153 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1154 vector_load
, stmt_info
, 0, vect_body
);
1155 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1156 vec_perm
, stmt_info
, 0, vect_body
);
1158 /* FIXME: If the misalignment remains fixed across the iterations of
1159 the containing loop, the following cost should be added to the
1161 if (targetm
.vectorize
.builtin_mask_for_load
)
1162 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1163 stmt_info
, 0, vect_body
);
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE
, vect_location
,
1167 "vect_model_load_cost: explicit realign\n");
1171 case dr_explicit_realign_optimized
:
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE
, vect_location
,
1175 "vect_model_load_cost: unaligned software "
1178 /* Unaligned software pipeline has a load of an address, an initial
1179 load, and possibly a mask operation to "prime" the loop. However,
1180 if this is an access in a group of loads, which provide grouped
1181 access, then the above cost should only be considered for one
1182 access in the group. Inside the loop, there is a load op
1183 and a realignment op. */
1185 if (add_realign_cost
&& record_prologue_costs
)
1187 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1188 vector_stmt
, stmt_info
,
1190 if (targetm
.vectorize
.builtin_mask_for_load
)
1191 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1192 vector_stmt
, stmt_info
,
1196 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1197 stmt_info
, 0, vect_body
);
1198 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1199 stmt_info
, 0, vect_body
);
1201 if (dump_enabled_p ())
1202 dump_printf_loc (MSG_NOTE
, vect_location
,
1203 "vect_model_load_cost: explicit realign optimized"
1209 case dr_unaligned_unsupported
:
1211 *inside_cost
= VECT_MAX_COST
;
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1215 "vect_model_load_cost: unsupported access.\n");
1224 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225 the loop preheader for the vectorized stmt STMT. */
1228 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1231 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1234 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1235 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1239 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1243 if (nested_in_vect_loop_p (loop
, stmt
))
1246 pe
= loop_preheader_edge (loop
);
1247 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1248 gcc_assert (!new_bb
);
1252 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1254 gimple_stmt_iterator gsi_bb_start
;
1256 gcc_assert (bb_vinfo
);
1257 bb
= BB_VINFO_BB (bb_vinfo
);
1258 gsi_bb_start
= gsi_after_labels (bb
);
1259 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1263 if (dump_enabled_p ())
1265 dump_printf_loc (MSG_NOTE
, vect_location
,
1266 "created new init_stmt: ");
1267 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1271 /* Function vect_init_vector.
1273 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1275 vector type a vector with all elements equal to VAL is created first.
1276 Place the initialization at BSI if it is not NULL. Otherwise, place the
1277 initialization at the loop preheader.
1278 Return the DEF of INIT_STMT.
1279 It will be used in the vectorization of STMT. */
1282 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1287 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1288 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1290 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1291 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1293 /* Scalar boolean value should be transformed into
1294 all zeros or all ones value before building a vector. */
1295 if (VECTOR_BOOLEAN_TYPE_P (type
))
1297 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1298 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1300 if (CONSTANT_CLASS_P (val
))
1301 val
= integer_zerop (val
) ? false_val
: true_val
;
1304 new_temp
= make_ssa_name (TREE_TYPE (type
));
1305 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1306 val
, true_val
, false_val
);
1307 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1311 else if (CONSTANT_CLASS_P (val
))
1312 val
= fold_convert (TREE_TYPE (type
), val
);
1315 new_temp
= make_ssa_name (TREE_TYPE (type
));
1316 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1317 init_stmt
= gimple_build_assign (new_temp
,
1318 fold_build1 (VIEW_CONVERT_EXPR
,
1322 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1323 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1327 val
= build_vector_from_val (type
, val
);
1330 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1331 init_stmt
= gimple_build_assign (new_temp
, val
);
1332 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1336 /* Function vect_get_vec_def_for_operand_1.
1338 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339 DT that will be used in the vectorized stmt. */
1342 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1346 stmt_vec_info def_stmt_info
= NULL
;
1350 /* operand is a constant or a loop invariant. */
1351 case vect_constant_def
:
1352 case vect_external_def
:
1353 /* Code should use vect_get_vec_def_for_operand. */
1356 /* operand is defined inside the loop. */
1357 case vect_internal_def
:
1359 /* Get the def from the vectorized stmt. */
1360 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1362 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1363 /* Get vectorized pattern statement. */
1365 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1366 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1367 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1369 gcc_assert (vec_stmt
);
1370 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1371 vec_oprnd
= PHI_RESULT (vec_stmt
);
1372 else if (is_gimple_call (vec_stmt
))
1373 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1375 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1379 /* operand is defined by a loop header phi. */
1380 case vect_reduction_def
:
1381 case vect_double_reduction_def
:
1382 case vect_nested_cycle
:
1383 case vect_induction_def
:
1385 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1387 /* Get the def from the vectorized stmt. */
1388 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1389 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1390 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1391 vec_oprnd
= PHI_RESULT (vec_stmt
);
1393 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1403 /* Function vect_get_vec_def_for_operand.
1405 OP is an operand in STMT. This function returns a (vector) def that will be
1406 used in the vectorized stmt for STMT.
1408 In the case that OP is an SSA_NAME which is defined in the loop, then
1409 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1411 In case OP is an invariant or constant, a new stmt that creates a vector def
1412 needs to be introduced. VECTYPE may be used to specify a required type for
1413 vector invariant. */
1416 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1419 enum vect_def_type dt
;
1421 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1422 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1424 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE
, vect_location
,
1427 "vect_get_vec_def_for_operand: ");
1428 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1429 dump_printf (MSG_NOTE
, "\n");
1432 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1433 gcc_assert (is_simple_use
);
1434 if (def_stmt
&& dump_enabled_p ())
1436 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1437 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1440 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1442 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1446 vector_type
= vectype
;
1447 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1448 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1449 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1451 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1453 gcc_assert (vector_type
);
1454 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1457 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1461 /* Function vect_get_vec_def_for_stmt_copy
1463 Return a vector-def for an operand. This function is used when the
1464 vectorized stmt to be created (by the caller to this function) is a "copy"
1465 created in case the vectorized result cannot fit in one vector, and several
1466 copies of the vector-stmt are required. In this case the vector-def is
1467 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1468 of the stmt that defines VEC_OPRND.
1469 DT is the type of the vector def VEC_OPRND.
1472 In case the vectorization factor (VF) is bigger than the number
1473 of elements that can fit in a vectype (nunits), we have to generate
1474 more than one vector stmt to vectorize the scalar stmt. This situation
1475 arises when there are multiple data-types operated upon in the loop; the
1476 smallest data-type determines the VF, and as a result, when vectorizing
1477 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478 vector stmt (each computing a vector of 'nunits' results, and together
1479 computing 'VF' results in each iteration). This function is called when
1480 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481 which VF=16 and nunits=4, so the number of copies required is 4):
1483 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1485 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1486 VS1.1: vx.1 = memref1 VS1.2
1487 VS1.2: vx.2 = memref2 VS1.3
1488 VS1.3: vx.3 = memref3
1490 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1491 VSnew.1: vz1 = vx.1 + ... VSnew.2
1492 VSnew.2: vz2 = vx.2 + ... VSnew.3
1493 VSnew.3: vz3 = vx.3 + ...
1495 The vectorization of S1 is explained in vectorizable_load.
1496 The vectorization of S2:
1497 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498 the function 'vect_get_vec_def_for_operand' is called to
1499 get the relevant vector-def for each operand of S2. For operand x it
1500 returns the vector-def 'vx.0'.
1502 To create the remaining copies of the vector-stmt (VSnew.j), this
1503 function is called to get the relevant vector-def for each operand. It is
1504 obtained from the respective VS1.j stmt, which is recorded in the
1505 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1507 For example, to obtain the vector-def 'vx.1' in order to create the
1508 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1510 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511 and return its def ('vx.1').
1512 Overall, to create the above sequence this function will be called 3 times:
1513 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1520 gimple
*vec_stmt_for_operand
;
1521 stmt_vec_info def_stmt_info
;
1523 /* Do nothing; can reuse same def. */
1524 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1527 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1528 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1529 gcc_assert (def_stmt_info
);
1530 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1531 gcc_assert (vec_stmt_for_operand
);
1532 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1533 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1535 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1540 /* Get vectorized definitions for the operands to create a copy of an original
1541 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1544 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1545 vec
<tree
> *vec_oprnds0
,
1546 vec
<tree
> *vec_oprnds1
)
1548 tree vec_oprnd
= vec_oprnds0
->pop ();
1550 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1551 vec_oprnds0
->quick_push (vec_oprnd
);
1553 if (vec_oprnds1
&& vec_oprnds1
->length ())
1555 vec_oprnd
= vec_oprnds1
->pop ();
1556 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1557 vec_oprnds1
->quick_push (vec_oprnd
);
1562 /* Get vectorized definitions for OP0 and OP1. */
1565 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1566 vec
<tree
> *vec_oprnds0
,
1567 vec
<tree
> *vec_oprnds1
,
1572 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1573 auto_vec
<tree
> ops (nops
);
1574 auto_vec
<vec
<tree
> > vec_defs (nops
);
1576 ops
.quick_push (op0
);
1578 ops
.quick_push (op1
);
1580 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1582 *vec_oprnds0
= vec_defs
[0];
1584 *vec_oprnds1
= vec_defs
[1];
1590 vec_oprnds0
->create (1);
1591 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1592 vec_oprnds0
->quick_push (vec_oprnd
);
1596 vec_oprnds1
->create (1);
1597 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1598 vec_oprnds1
->quick_push (vec_oprnd
);
1604 /* Function vect_finish_stmt_generation.
1606 Insert a new stmt. */
1609 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1610 gimple_stmt_iterator
*gsi
)
1612 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1613 vec_info
*vinfo
= stmt_info
->vinfo
;
1615 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1617 if (!gsi_end_p (*gsi
)
1618 && gimple_has_mem_ops (vec_stmt
))
1620 gimple
*at_stmt
= gsi_stmt (*gsi
);
1621 tree vuse
= gimple_vuse (at_stmt
);
1622 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1624 tree vdef
= gimple_vdef (at_stmt
);
1625 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1631 && ((is_gimple_assign (vec_stmt
)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1633 || (is_gimple_call (vec_stmt
)
1634 && !(gimple_call_flags (vec_stmt
)
1635 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1637 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1638 gimple_set_vdef (vec_stmt
, new_vdef
);
1639 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1643 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1645 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1647 if (dump_enabled_p ())
1649 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1653 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1659 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1660 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1663 /* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
1669 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1670 tree vectype_out
, tree vectype_in
)
1673 if (internal_fn_p (cfn
))
1674 ifn
= as_internal_fn (cfn
);
1676 ifn
= associated_internal_fn (fndecl
);
1677 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1679 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1680 if (info
.vectorizable
)
1682 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1683 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1684 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1685 OPTIMIZE_FOR_SPEED
))
1693 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1694 gimple_stmt_iterator
*);
1696 /* STMT is a non-strided load or store, meaning that it accesses
1697 elements with a known constant step. Return -1 if that step
1698 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1701 compare_step_with_zero (gimple
*stmt
)
1703 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1704 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1705 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
1709 /* If the target supports a permute mask that reverses the elements in
1710 a vector of type VECTYPE, return that mask, otherwise return null. */
1713 perm_mask_for_reverse (tree vectype
)
1717 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1719 auto_vec_perm_indices
sel (nunits
);
1720 for (i
= 0; i
< nunits
; ++i
)
1721 sel
.quick_push (nunits
- 1 - i
);
1723 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
))
1725 return vect_gen_perm_mask_checked (vectype
, sel
);
1728 /* A subroutine of get_load_store_type, with a subset of the same
1729 arguments. Handle the case where STMT is part of a grouped load
1732 For stores, the statements in the group are all consecutive
1733 and there is no gap at the end. For loads, the statements in the
1734 group might not be consecutive; there can be gaps between statements
1735 as well as at the end. */
1738 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1739 vec_load_store_type vls_type
,
1740 vect_memory_access_type
*memory_access_type
)
1742 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1743 vec_info
*vinfo
= stmt_info
->vinfo
;
1744 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1745 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1746 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1747 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1748 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1749 bool single_element_p
= (stmt
== first_stmt
1750 && !GROUP_NEXT_ELEMENT (stmt_info
));
1751 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1752 unsigned nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1754 /* True if the vectorized statements would access beyond the last
1755 statement in the group. */
1756 bool overrun_p
= false;
1758 /* True if we can cope with such overrun by peeling for gaps, so that
1759 there is at least one final scalar iteration after the vector loop. */
1760 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1762 /* There can only be a gap at the end of the group if the stride is
1763 known at compile time. */
1764 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1766 /* Stores can't yet have gaps. */
1767 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1771 if (STMT_VINFO_STRIDED_P (stmt_info
))
1773 /* Try to use consecutive accesses of GROUP_SIZE elements,
1774 separated by the stride, until we have a complete vector.
1775 Fall back to scalar accesses if that isn't possible. */
1776 if (nunits
% group_size
== 0)
1777 *memory_access_type
= VMAT_STRIDED_SLP
;
1779 *memory_access_type
= VMAT_ELEMENTWISE
;
1783 overrun_p
= loop_vinfo
&& gap
!= 0;
1784 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1787 "Grouped store with gaps requires"
1788 " non-consecutive accesses\n");
1791 /* An overrun is fine if the trailing elements are smaller
1792 than the alignment boundary B. Every vector access will
1793 be a multiple of B and so we are guaranteed to access a
1794 non-gap element in the same B-sized block. */
1796 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1797 / vect_get_scalar_dr_size (first_dr
)))
1799 if (overrun_p
&& !can_overrun_p
)
1801 if (dump_enabled_p ())
1802 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1803 "Peeling for outer loop is not supported\n");
1806 *memory_access_type
= VMAT_CONTIGUOUS
;
1811 /* We can always handle this case using elementwise accesses,
1812 but see if something more efficient is available. */
1813 *memory_access_type
= VMAT_ELEMENTWISE
;
1815 /* If there is a gap at the end of the group then these optimizations
1816 would access excess elements in the last iteration. */
1817 bool would_overrun_p
= (gap
!= 0);
1818 /* An overrun is fine if the trailing elements are smaller than the
1819 alignment boundary B. Every vector access will be a multiple of B
1820 and so we are guaranteed to access a non-gap element in the
1821 same B-sized block. */
1823 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1824 / vect_get_scalar_dr_size (first_dr
)))
1825 would_overrun_p
= false;
1827 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1828 && (can_overrun_p
|| !would_overrun_p
)
1829 && compare_step_with_zero (stmt
) > 0)
1831 /* First try using LOAD/STORE_LANES. */
1832 if (vls_type
== VLS_LOAD
1833 ? vect_load_lanes_supported (vectype
, group_size
)
1834 : vect_store_lanes_supported (vectype
, group_size
))
1836 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1837 overrun_p
= would_overrun_p
;
1840 /* If that fails, try using permuting loads. */
1841 if (*memory_access_type
== VMAT_ELEMENTWISE
1842 && (vls_type
== VLS_LOAD
1843 ? vect_grouped_load_supported (vectype
, single_element_p
,
1845 : vect_grouped_store_supported (vectype
, group_size
)))
1847 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1848 overrun_p
= would_overrun_p
;
1853 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1855 /* STMT is the leader of the group. Check the operands of all the
1856 stmts of the group. */
1857 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1860 gcc_assert (gimple_assign_single_p (next_stmt
));
1861 tree op
= gimple_assign_rhs1 (next_stmt
);
1863 enum vect_def_type dt
;
1864 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1868 "use not simple.\n");
1871 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1877 gcc_assert (can_overrun_p
);
1878 if (dump_enabled_p ())
1879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1880 "Data access with gaps requires scalar "
1882 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1888 /* A subroutine of get_load_store_type, with a subset of the same
1889 arguments. Handle the case where STMT is a load or store that
1890 accesses consecutive elements with a negative step. */
1892 static vect_memory_access_type
1893 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1894 vec_load_store_type vls_type
,
1895 unsigned int ncopies
)
1897 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1898 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1899 dr_alignment_support alignment_support_scheme
;
1903 if (dump_enabled_p ())
1904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1905 "multiple types with negative step.\n");
1906 return VMAT_ELEMENTWISE
;
1909 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1910 if (alignment_support_scheme
!= dr_aligned
1911 && alignment_support_scheme
!= dr_unaligned_supported
)
1913 if (dump_enabled_p ())
1914 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1915 "negative step but alignment required.\n");
1916 return VMAT_ELEMENTWISE
;
1919 if (vls_type
== VLS_STORE_INVARIANT
)
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_NOTE
, vect_location
,
1923 "negative step with invariant source;"
1924 " no permute needed.\n");
1925 return VMAT_CONTIGUOUS_DOWN
;
1928 if (!perm_mask_for_reverse (vectype
))
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1932 "negative step and reversing not supported.\n");
1933 return VMAT_ELEMENTWISE
;
1936 return VMAT_CONTIGUOUS_REVERSE
;
1939 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1940 if there is a memory access type that the vectorized form can use,
1941 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1942 or scatters, fill in GS_INFO accordingly.
1944 SLP says whether we're performing SLP rather than loop vectorization.
1945 VECTYPE is the vector type that the vectorized statements will use.
1946 NCOPIES is the number of vector statements that will be needed. */
1949 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1950 vec_load_store_type vls_type
, unsigned int ncopies
,
1951 vect_memory_access_type
*memory_access_type
,
1952 gather_scatter_info
*gs_info
)
1954 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1955 vec_info
*vinfo
= stmt_info
->vinfo
;
1956 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1957 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1959 *memory_access_type
= VMAT_GATHER_SCATTER
;
1961 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1963 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1964 &gs_info
->offset_dt
,
1965 &gs_info
->offset_vectype
))
1967 if (dump_enabled_p ())
1968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1969 "%s index use not simple.\n",
1970 vls_type
== VLS_LOAD
? "gather" : "scatter");
1974 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1976 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1977 memory_access_type
))
1980 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1983 *memory_access_type
= VMAT_ELEMENTWISE
;
1987 int cmp
= compare_step_with_zero (stmt
);
1989 *memory_access_type
= get_negative_load_store_type
1990 (stmt
, vectype
, vls_type
, ncopies
);
1993 gcc_assert (vls_type
== VLS_LOAD
);
1994 *memory_access_type
= VMAT_INVARIANT
;
1997 *memory_access_type
= VMAT_CONTIGUOUS
;
2000 /* FIXME: At the moment the cost model seems to underestimate the
2001 cost of using elementwise accesses. This check preserves the
2002 traditional behavior until that can be fixed. */
2003 if (*memory_access_type
== VMAT_ELEMENTWISE
2004 && !STMT_VINFO_STRIDED_P (stmt_info
))
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2008 "not falling back to elementwise accesses\n");
2014 /* Function vectorizable_mask_load_store.
2016 Check if STMT performs a conditional load or store that can be vectorized.
2017 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2018 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2019 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2022 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2023 gimple
**vec_stmt
, slp_tree slp_node
)
2025 tree vec_dest
= NULL
;
2026 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2027 stmt_vec_info prev_stmt_info
;
2028 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2029 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2030 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2031 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2032 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2033 tree rhs_vectype
= NULL_TREE
;
2038 tree dataref_ptr
= NULL_TREE
;
2040 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2044 gather_scatter_info gs_info
;
2045 vec_load_store_type vls_type
;
2048 enum vect_def_type dt
;
2050 if (slp_node
!= NULL
)
2053 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2054 gcc_assert (ncopies
>= 1);
2056 mask
= gimple_call_arg (stmt
, 2);
2058 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2061 /* FORNOW. This restriction should be relaxed. */
2062 if (nested_in_vect_loop
&& ncopies
> 1)
2064 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2066 "multiple types in nested loop.");
2070 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2073 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2077 if (!STMT_VINFO_DATA_REF (stmt_info
))
2080 elem_type
= TREE_TYPE (vectype
);
2082 if (TREE_CODE (mask
) != SSA_NAME
)
2085 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2089 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2091 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2092 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2095 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2097 tree rhs
= gimple_call_arg (stmt
, 3);
2098 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2100 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2101 vls_type
= VLS_STORE_INVARIANT
;
2103 vls_type
= VLS_STORE
;
2106 vls_type
= VLS_LOAD
;
2108 vect_memory_access_type memory_access_type
;
2109 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2110 &memory_access_type
, &gs_info
))
2113 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2115 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2117 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2118 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2120 if (dump_enabled_p ())
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2122 "masked gather with integer mask not supported.");
2126 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2128 if (dump_enabled_p ())
2129 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2130 "unsupported access type for masked %s.\n",
2131 vls_type
== VLS_LOAD
? "load" : "store");
2134 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2135 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2136 TYPE_MODE (mask_vectype
),
2137 vls_type
== VLS_LOAD
)
2139 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2142 if (!vec_stmt
) /* transformation not required. */
2144 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2145 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2146 if (vls_type
== VLS_LOAD
)
2147 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2150 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2151 dt
, NULL
, NULL
, NULL
);
2154 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2158 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2160 tree vec_oprnd0
= NULL_TREE
, op
;
2161 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2162 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2163 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2164 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2165 tree mask_perm_mask
= NULL_TREE
;
2166 edge pe
= loop_preheader_edge (loop
);
2169 enum { NARROW
, NONE
, WIDEN
} modifier
;
2170 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2172 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2173 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2174 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2175 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2176 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2177 scaletype
= TREE_VALUE (arglist
);
2178 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2179 && types_compatible_p (srctype
, masktype
));
2181 if (nunits
== gather_off_nunits
)
2183 else if (nunits
== gather_off_nunits
/ 2)
2187 auto_vec_perm_indices
sel (gather_off_nunits
);
2188 for (i
= 0; i
< gather_off_nunits
; ++i
)
2189 sel
.quick_push (i
| nunits
);
2191 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
2193 else if (nunits
== gather_off_nunits
* 2)
2197 auto_vec_perm_indices
sel (nunits
);
2198 sel
.quick_grow (nunits
);
2199 for (i
= 0; i
< nunits
; ++i
)
2200 sel
[i
] = i
< gather_off_nunits
2201 ? i
: i
+ nunits
- gather_off_nunits
;
2203 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
2205 for (i
= 0; i
< nunits
; ++i
)
2206 sel
[i
] = i
| gather_off_nunits
;
2207 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
2212 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2214 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2215 if (!is_gimple_min_invariant (ptr
))
2217 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2218 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2219 gcc_assert (!new_bb
);
2222 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2224 prev_stmt_info
= NULL
;
2225 for (j
= 0; j
< ncopies
; ++j
)
2227 if (modifier
== WIDEN
&& (j
& 1))
2228 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2229 perm_mask
, stmt
, gsi
);
2232 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2235 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2237 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2239 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2240 == TYPE_VECTOR_SUBPARTS (idxtype
));
2241 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2242 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2244 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2245 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2249 if (mask_perm_mask
&& (j
& 1))
2250 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2251 mask_perm_mask
, stmt
, gsi
);
2255 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2258 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2259 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2263 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2265 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2266 == TYPE_VECTOR_SUBPARTS (masktype
));
2267 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2268 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2270 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2271 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2277 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2280 if (!useless_type_conversion_p (vectype
, rettype
))
2282 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2283 == TYPE_VECTOR_SUBPARTS (rettype
));
2284 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2285 gimple_call_set_lhs (new_stmt
, op
);
2286 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2287 var
= make_ssa_name (vec_dest
);
2288 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2289 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2293 var
= make_ssa_name (vec_dest
, new_stmt
);
2294 gimple_call_set_lhs (new_stmt
, var
);
2297 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2299 if (modifier
== NARROW
)
2306 var
= permute_vec_elements (prev_res
, var
,
2307 perm_mask
, stmt
, gsi
);
2308 new_stmt
= SSA_NAME_DEF_STMT (var
);
2311 if (prev_stmt_info
== NULL
)
2312 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2314 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2315 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2318 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2320 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2322 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2323 stmt_info
= vinfo_for_stmt (stmt
);
2325 tree lhs
= gimple_call_lhs (stmt
);
2326 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2327 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2328 set_vinfo_for_stmt (stmt
, NULL
);
2329 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2330 gsi_replace (gsi
, new_stmt
, true);
2333 else if (vls_type
!= VLS_LOAD
)
2335 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2336 prev_stmt_info
= NULL
;
2337 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2338 for (i
= 0; i
< ncopies
; i
++)
2340 unsigned align
, misalign
;
2344 tree rhs
= gimple_call_arg (stmt
, 3);
2345 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2346 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2348 /* We should have catched mismatched types earlier. */
2349 gcc_assert (useless_type_conversion_p (vectype
,
2350 TREE_TYPE (vec_rhs
)));
2351 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2352 NULL_TREE
, &dummy
, gsi
,
2353 &ptr_incr
, false, &inv_p
);
2354 gcc_assert (!inv_p
);
2358 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2359 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2360 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2361 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2362 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2363 TYPE_SIZE_UNIT (vectype
));
2366 align
= DR_TARGET_ALIGNMENT (dr
);
2367 if (aligned_access_p (dr
))
2369 else if (DR_MISALIGNMENT (dr
) == -1)
2371 align
= TYPE_ALIGN_UNIT (elem_type
);
2375 misalign
= DR_MISALIGNMENT (dr
);
2376 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2378 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2379 misalign
? least_bit_hwi (misalign
) : align
);
2381 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2382 ptr
, vec_mask
, vec_rhs
);
2383 gimple_call_set_nothrow (call
, true);
2385 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2387 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2389 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2390 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2395 tree vec_mask
= NULL_TREE
;
2396 prev_stmt_info
= NULL
;
2397 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2398 for (i
= 0; i
< ncopies
; i
++)
2400 unsigned align
, misalign
;
2404 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2406 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2407 NULL_TREE
, &dummy
, gsi
,
2408 &ptr_incr
, false, &inv_p
);
2409 gcc_assert (!inv_p
);
2413 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2414 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2415 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2416 TYPE_SIZE_UNIT (vectype
));
2419 align
= DR_TARGET_ALIGNMENT (dr
);
2420 if (aligned_access_p (dr
))
2422 else if (DR_MISALIGNMENT (dr
) == -1)
2424 align
= TYPE_ALIGN_UNIT (elem_type
);
2428 misalign
= DR_MISALIGNMENT (dr
);
2429 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2431 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2432 misalign
? least_bit_hwi (misalign
) : align
);
2434 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2436 gimple_call_set_lhs (call
, make_ssa_name (vec_dest
));
2437 gimple_call_set_nothrow (call
, true);
2438 vect_finish_stmt_generation (stmt
, call
, gsi
);
2440 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= call
;
2442 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = call
;
2443 prev_stmt_info
= vinfo_for_stmt (call
);
2447 if (vls_type
== VLS_LOAD
)
2449 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2451 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2453 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2454 stmt_info
= vinfo_for_stmt (stmt
);
2456 tree lhs
= gimple_call_lhs (stmt
);
2457 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2458 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2459 set_vinfo_for_stmt (stmt
, NULL
);
2460 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2461 gsi_replace (gsi
, new_stmt
, true);
2467 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2470 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2471 gimple
**vec_stmt
, slp_tree slp_node
,
2472 tree vectype_in
, enum vect_def_type
*dt
)
2475 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2476 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2477 unsigned ncopies
, nunits
;
2479 op
= gimple_call_arg (stmt
, 0);
2480 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2481 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2483 /* Multiple types in SLP are handled by creating the appropriate number of
2484 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2489 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2491 gcc_assert (ncopies
>= 1);
2493 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2497 unsigned int num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2498 unsigned word_bytes
= num_bytes
/ nunits
;
2500 auto_vec_perm_indices
elts (num_bytes
);
2501 for (unsigned i
= 0; i
< nunits
; ++i
)
2502 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2503 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2505 if (! can_vec_perm_p (TYPE_MODE (char_vectype
), false, &elts
))
2510 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2511 if (dump_enabled_p ())
2512 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2514 if (! PURE_SLP_STMT (stmt_info
))
2516 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2517 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2518 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2519 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2524 tree_vector_builder
telts (char_vectype
, num_bytes
, 1);
2525 for (unsigned i
= 0; i
< num_bytes
; ++i
)
2526 telts
.quick_push (build_int_cst (char_type_node
, elts
[i
]));
2527 tree bswap_vconst
= telts
.build ();
2530 vec
<tree
> vec_oprnds
= vNULL
;
2531 gimple
*new_stmt
= NULL
;
2532 stmt_vec_info prev_stmt_info
= NULL
;
2533 for (unsigned j
= 0; j
< ncopies
; j
++)
2537 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2539 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2541 /* Arguments are ready. create the new vector stmt. */
2544 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2546 tree tem
= make_ssa_name (char_vectype
);
2547 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2548 char_vectype
, vop
));
2549 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2550 tree tem2
= make_ssa_name (char_vectype
);
2551 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2552 tem
, tem
, bswap_vconst
);
2553 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2554 tem
= make_ssa_name (vectype
);
2555 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2557 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2559 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2566 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2568 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2570 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2573 vec_oprnds
.release ();
2577 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2578 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2579 in a single step. On success, store the binary pack code in
2583 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2584 tree_code
*convert_code
)
2586 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2587 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2591 int multi_step_cvt
= 0;
2592 auto_vec
<tree
, 8> interm_types
;
2593 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2594 &code
, &multi_step_cvt
,
2599 *convert_code
= code
;
2603 /* Function vectorizable_call.
2605 Check if GS performs a function call that can be vectorized.
2606 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2607 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2608 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2611 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2618 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2619 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2620 tree vectype_out
, vectype_in
;
2623 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2624 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2625 vec_info
*vinfo
= stmt_info
->vinfo
;
2626 tree fndecl
, new_temp
, rhs_type
;
2628 enum vect_def_type dt
[3]
2629 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2631 gimple
*new_stmt
= NULL
;
2633 vec
<tree
> vargs
= vNULL
;
2634 enum { NARROW
, NONE
, WIDEN
} modifier
;
2638 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2641 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2645 /* Is GS a vectorizable call? */
2646 stmt
= dyn_cast
<gcall
*> (gs
);
2650 if (gimple_call_internal_p (stmt
)
2651 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2652 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2653 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2656 if (gimple_call_lhs (stmt
) == NULL_TREE
2657 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2660 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2662 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2664 /* Process function arguments. */
2665 rhs_type
= NULL_TREE
;
2666 vectype_in
= NULL_TREE
;
2667 nargs
= gimple_call_num_args (stmt
);
2669 /* Bail out if the function has more than three arguments, we do not have
2670 interesting builtin functions to vectorize with more than two arguments
2671 except for fma. No arguments is also not good. */
2672 if (nargs
== 0 || nargs
> 3)
2675 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2676 if (gimple_call_internal_p (stmt
)
2677 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2680 rhs_type
= unsigned_type_node
;
2683 for (i
= 0; i
< nargs
; i
++)
2687 op
= gimple_call_arg (stmt
, i
);
2689 /* We can only handle calls with arguments of the same type. */
2691 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2693 if (dump_enabled_p ())
2694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2695 "argument types differ.\n");
2699 rhs_type
= TREE_TYPE (op
);
2701 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2703 if (dump_enabled_p ())
2704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2705 "use not simple.\n");
2710 vectype_in
= opvectype
;
2712 && opvectype
!= vectype_in
)
2714 if (dump_enabled_p ())
2715 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2716 "argument vector types differ.\n");
2720 /* If all arguments are external or constant defs use a vector type with
2721 the same size as the output vector type. */
2723 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2725 gcc_assert (vectype_in
);
2728 if (dump_enabled_p ())
2730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2731 "no vectype for scalar type ");
2732 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2733 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2740 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2741 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2742 if (nunits_in
== nunits_out
/ 2)
2744 else if (nunits_out
== nunits_in
)
2746 else if (nunits_out
== nunits_in
/ 2)
2751 /* We only handle functions that do not read or clobber memory. */
2752 if (gimple_vuse (stmt
))
2754 if (dump_enabled_p ())
2755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2756 "function reads from or writes to memory.\n");
2760 /* For now, we only vectorize functions if a target specific builtin
2761 is available. TODO -- in some cases, it might be profitable to
2762 insert the calls for pieces of the vector, in order to be able
2763 to vectorize other operations in the loop. */
2765 internal_fn ifn
= IFN_LAST
;
2766 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2767 tree callee
= gimple_call_fndecl (stmt
);
2769 /* First try using an internal function. */
2770 tree_code convert_code
= ERROR_MARK
;
2772 && (modifier
== NONE
2773 || (modifier
== NARROW
2774 && simple_integer_narrowing (vectype_out
, vectype_in
,
2776 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2779 /* If that fails, try asking for a target-specific built-in function. */
2780 if (ifn
== IFN_LAST
)
2782 if (cfn
!= CFN_LAST
)
2783 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2784 (cfn
, vectype_out
, vectype_in
);
2786 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2787 (callee
, vectype_out
, vectype_in
);
2790 if (ifn
== IFN_LAST
&& !fndecl
)
2792 if (cfn
== CFN_GOMP_SIMD_LANE
2795 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2796 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2797 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2798 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2800 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2801 { 0, 1, 2, ... vf - 1 } vector. */
2802 gcc_assert (nargs
== 0);
2804 else if (modifier
== NONE
2805 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2806 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2807 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2808 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2812 if (dump_enabled_p ())
2813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2814 "function is not vectorizable.\n");
2821 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2822 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
2824 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
2826 /* Sanity check: make sure that at least one copy of the vectorized stmt
2827 needs to be generated. */
2828 gcc_assert (ncopies
>= 1);
2830 if (!vec_stmt
) /* transformation not required. */
2832 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2833 if (dump_enabled_p ())
2834 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2836 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
2837 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2838 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2839 vec_promote_demote
, stmt_info
, 0, vect_body
);
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2850 scalar_dest
= gimple_call_lhs (stmt
);
2851 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2853 prev_stmt_info
= NULL
;
2854 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2856 tree prev_res
= NULL_TREE
;
2857 for (j
= 0; j
< ncopies
; ++j
)
2859 /* Build argument list for the vectorized call. */
2861 vargs
.create (nargs
);
2867 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2868 vec
<tree
> vec_oprnds0
;
2870 for (i
= 0; i
< nargs
; i
++)
2871 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2872 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
2873 vec_oprnds0
= vec_defs
[0];
2875 /* Arguments are ready. Create the new vector stmt. */
2876 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2879 for (k
= 0; k
< nargs
; k
++)
2881 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2882 vargs
[k
] = vec_oprndsk
[i
];
2884 if (modifier
== NARROW
)
2886 tree half_res
= make_ssa_name (vectype_in
);
2888 = gimple_build_call_internal_vec (ifn
, vargs
);
2889 gimple_call_set_lhs (call
, half_res
);
2890 gimple_call_set_nothrow (call
, true);
2892 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2895 prev_res
= half_res
;
2898 new_temp
= make_ssa_name (vec_dest
);
2899 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2900 prev_res
, half_res
);
2905 if (ifn
!= IFN_LAST
)
2906 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2908 call
= gimple_build_call_vec (fndecl
, vargs
);
2909 new_temp
= make_ssa_name (vec_dest
, call
);
2910 gimple_call_set_lhs (call
, new_temp
);
2911 gimple_call_set_nothrow (call
, true);
2914 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2915 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2918 for (i
= 0; i
< nargs
; i
++)
2920 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2921 vec_oprndsi
.release ();
2926 for (i
= 0; i
< nargs
; i
++)
2928 op
= gimple_call_arg (stmt
, i
);
2931 = vect_get_vec_def_for_operand (op
, stmt
);
2934 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2936 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2939 vargs
.quick_push (vec_oprnd0
);
2942 if (gimple_call_internal_p (stmt
)
2943 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2945 tree_vector_builder
v (vectype_out
, 1, 3);
2946 for (int k
= 0; k
< 3; ++k
)
2947 v
.quick_push (build_int_cst (unsigned_type_node
,
2948 j
* nunits_out
+ k
));
2949 tree cst
= v
.build ();
2951 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2952 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2953 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2954 new_temp
= make_ssa_name (vec_dest
);
2955 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2957 else if (modifier
== NARROW
)
2959 tree half_res
= make_ssa_name (vectype_in
);
2960 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
2961 gimple_call_set_lhs (call
, half_res
);
2962 gimple_call_set_nothrow (call
, true);
2964 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2967 prev_res
= half_res
;
2970 new_temp
= make_ssa_name (vec_dest
);
2971 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2972 prev_res
, half_res
);
2977 if (ifn
!= IFN_LAST
)
2978 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2980 call
= gimple_build_call_vec (fndecl
, vargs
);
2981 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2982 gimple_call_set_lhs (call
, new_temp
);
2983 gimple_call_set_nothrow (call
, true);
2986 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2988 if (j
== (modifier
== NARROW
? 1 : 0))
2989 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2991 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2993 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2996 else if (modifier
== NARROW
)
2998 for (j
= 0; j
< ncopies
; ++j
)
3000 /* Build argument list for the vectorized call. */
3002 vargs
.create (nargs
* 2);
3008 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3009 vec
<tree
> vec_oprnds0
;
3011 for (i
= 0; i
< nargs
; i
++)
3012 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3013 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3014 vec_oprnds0
= vec_defs
[0];
3016 /* Arguments are ready. Create the new vector stmt. */
3017 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3021 for (k
= 0; k
< nargs
; k
++)
3023 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3024 vargs
.quick_push (vec_oprndsk
[i
]);
3025 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3028 if (ifn
!= IFN_LAST
)
3029 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3031 call
= gimple_build_call_vec (fndecl
, vargs
);
3032 new_temp
= make_ssa_name (vec_dest
, call
);
3033 gimple_call_set_lhs (call
, new_temp
);
3034 gimple_call_set_nothrow (call
, true);
3036 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3037 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3040 for (i
= 0; i
< nargs
; i
++)
3042 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3043 vec_oprndsi
.release ();
3048 for (i
= 0; i
< nargs
; i
++)
3050 op
= gimple_call_arg (stmt
, i
);
3054 = vect_get_vec_def_for_operand (op
, stmt
);
3056 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3060 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3062 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3064 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3067 vargs
.quick_push (vec_oprnd0
);
3068 vargs
.quick_push (vec_oprnd1
);
3071 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3072 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3073 gimple_call_set_lhs (new_stmt
, new_temp
);
3074 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3077 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3079 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3081 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3084 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3087 /* No current target implements this case. */
3092 /* The call in STMT might prevent it from being removed in dce.
3093 We however cannot remove it here, due to the way the ssa name
3094 it defines is mapped to the new definition. So just replace
3095 rhs of the statement with something harmless. */
3100 type
= TREE_TYPE (scalar_dest
);
3101 if (is_pattern_stmt_p (stmt_info
))
3102 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3104 lhs
= gimple_call_lhs (stmt
);
3106 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3107 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3108 set_vinfo_for_stmt (stmt
, NULL
);
3109 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3110 gsi_replace (gsi
, new_stmt
, false);
3116 struct simd_call_arg_info
3120 HOST_WIDE_INT linear_step
;
3121 enum vect_def_type dt
;
3123 bool simd_lane_linear
;
3126 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3127 is linear within simd lane (but not within whole loop), note it in
3131 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3132 struct simd_call_arg_info
*arginfo
)
3134 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3136 if (!is_gimple_assign (def_stmt
)
3137 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3138 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3141 tree base
= gimple_assign_rhs1 (def_stmt
);
3142 HOST_WIDE_INT linear_step
= 0;
3143 tree v
= gimple_assign_rhs2 (def_stmt
);
3144 while (TREE_CODE (v
) == SSA_NAME
)
3147 def_stmt
= SSA_NAME_DEF_STMT (v
);
3148 if (is_gimple_assign (def_stmt
))
3149 switch (gimple_assign_rhs_code (def_stmt
))
3152 t
= gimple_assign_rhs2 (def_stmt
);
3153 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3155 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3156 v
= gimple_assign_rhs1 (def_stmt
);
3159 t
= gimple_assign_rhs2 (def_stmt
);
3160 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3162 linear_step
= tree_to_shwi (t
);
3163 v
= gimple_assign_rhs1 (def_stmt
);
3166 t
= gimple_assign_rhs1 (def_stmt
);
3167 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3168 || (TYPE_PRECISION (TREE_TYPE (v
))
3169 < TYPE_PRECISION (TREE_TYPE (t
))))
3178 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3180 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3181 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3186 arginfo
->linear_step
= linear_step
;
3188 arginfo
->simd_lane_linear
= true;
3194 /* Function vectorizable_simd_clone_call.
3196 Check if STMT performs a function call that can be vectorized
3197 by calling a simd clone of the function.
3198 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3199 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3200 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3203 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3204 gimple
**vec_stmt
, slp_tree slp_node
)
3209 tree vec_oprnd0
= NULL_TREE
;
3210 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3212 unsigned int nunits
;
3213 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3214 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3215 vec_info
*vinfo
= stmt_info
->vinfo
;
3216 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3217 tree fndecl
, new_temp
;
3219 gimple
*new_stmt
= NULL
;
3221 auto_vec
<simd_call_arg_info
> arginfo
;
3222 vec
<tree
> vargs
= vNULL
;
3224 tree lhs
, rtype
, ratype
;
3225 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3227 /* Is STMT a vectorizable call? */
3228 if (!is_gimple_call (stmt
))
3231 fndecl
= gimple_call_fndecl (stmt
);
3232 if (fndecl
== NULL_TREE
)
3235 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3236 if (node
== NULL
|| node
->simd_clones
== NULL
)
3239 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3242 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3246 if (gimple_call_lhs (stmt
)
3247 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3250 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3252 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3254 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3261 /* Process function arguments. */
3262 nargs
= gimple_call_num_args (stmt
);
3264 /* Bail out if the function has zero arguments. */
3268 arginfo
.reserve (nargs
, true);
3270 for (i
= 0; i
< nargs
; i
++)
3272 simd_call_arg_info thisarginfo
;
3275 thisarginfo
.linear_step
= 0;
3276 thisarginfo
.align
= 0;
3277 thisarginfo
.op
= NULL_TREE
;
3278 thisarginfo
.simd_lane_linear
= false;
3280 op
= gimple_call_arg (stmt
, i
);
3281 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3282 &thisarginfo
.vectype
)
3283 || thisarginfo
.dt
== vect_uninitialized_def
)
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "use not simple.\n");
3291 if (thisarginfo
.dt
== vect_constant_def
3292 || thisarginfo
.dt
== vect_external_def
)
3293 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3295 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3297 /* For linear arguments, the analyze phase should have saved
3298 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3299 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3300 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3302 gcc_assert (vec_stmt
);
3303 thisarginfo
.linear_step
3304 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3306 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3307 thisarginfo
.simd_lane_linear
3308 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3309 == boolean_true_node
);
3310 /* If loop has been peeled for alignment, we need to adjust it. */
3311 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3312 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3313 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3315 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3316 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3317 tree opt
= TREE_TYPE (thisarginfo
.op
);
3318 bias
= fold_convert (TREE_TYPE (step
), bias
);
3319 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3321 = fold_build2 (POINTER_TYPE_P (opt
)
3322 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3323 thisarginfo
.op
, bias
);
3327 && thisarginfo
.dt
!= vect_constant_def
3328 && thisarginfo
.dt
!= vect_external_def
3330 && TREE_CODE (op
) == SSA_NAME
3331 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3333 && tree_fits_shwi_p (iv
.step
))
3335 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3336 thisarginfo
.op
= iv
.base
;
3338 else if ((thisarginfo
.dt
== vect_constant_def
3339 || thisarginfo
.dt
== vect_external_def
)
3340 && POINTER_TYPE_P (TREE_TYPE (op
)))
3341 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3342 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3344 if (POINTER_TYPE_P (TREE_TYPE (op
))
3345 && !thisarginfo
.linear_step
3347 && thisarginfo
.dt
!= vect_constant_def
3348 && thisarginfo
.dt
!= vect_external_def
3351 && TREE_CODE (op
) == SSA_NAME
)
3352 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3354 arginfo
.quick_push (thisarginfo
);
3357 unsigned int badness
= 0;
3358 struct cgraph_node
*bestn
= NULL
;
3359 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3360 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3362 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3363 n
= n
->simdclone
->next_clone
)
3365 unsigned int this_badness
= 0;
3366 if (n
->simdclone
->simdlen
3367 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3368 || n
->simdclone
->nargs
!= nargs
)
3370 if (n
->simdclone
->simdlen
3371 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3372 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3373 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3374 if (n
->simdclone
->inbranch
)
3375 this_badness
+= 2048;
3376 int target_badness
= targetm
.simd_clone
.usable (n
);
3377 if (target_badness
< 0)
3379 this_badness
+= target_badness
* 512;
3380 /* FORNOW: Have to add code to add the mask argument. */
3381 if (n
->simdclone
->inbranch
)
3383 for (i
= 0; i
< nargs
; i
++)
3385 switch (n
->simdclone
->args
[i
].arg_type
)
3387 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3388 if (!useless_type_conversion_p
3389 (n
->simdclone
->args
[i
].orig_type
,
3390 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3392 else if (arginfo
[i
].dt
== vect_constant_def
3393 || arginfo
[i
].dt
== vect_external_def
3394 || arginfo
[i
].linear_step
)
3397 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3398 if (arginfo
[i
].dt
!= vect_constant_def
3399 && arginfo
[i
].dt
!= vect_external_def
)
3402 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3403 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3404 if (arginfo
[i
].dt
== vect_constant_def
3405 || arginfo
[i
].dt
== vect_external_def
3406 || (arginfo
[i
].linear_step
3407 != n
->simdclone
->args
[i
].linear_step
))
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3411 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3412 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3413 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3414 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3415 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3419 case SIMD_CLONE_ARG_TYPE_MASK
:
3422 if (i
== (size_t) -1)
3424 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3429 if (arginfo
[i
].align
)
3430 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3431 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3433 if (i
== (size_t) -1)
3435 if (bestn
== NULL
|| this_badness
< badness
)
3438 badness
= this_badness
;
3445 for (i
= 0; i
< nargs
; i
++)
3446 if ((arginfo
[i
].dt
== vect_constant_def
3447 || arginfo
[i
].dt
== vect_external_def
)
3448 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3451 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3453 if (arginfo
[i
].vectype
== NULL
3454 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3455 > bestn
->simdclone
->simdlen
))
3459 fndecl
= bestn
->decl
;
3460 nunits
= bestn
->simdclone
->simdlen
;
3461 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3463 /* If the function isn't const, only allow it in simd loops where user
3464 has asserted that at least nunits consecutive iterations can be
3465 performed using SIMD instructions. */
3466 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3467 && gimple_vuse (stmt
))
3470 /* Sanity check: make sure that at least one copy of the vectorized stmt
3471 needs to be generated. */
3472 gcc_assert (ncopies
>= 1);
3474 if (!vec_stmt
) /* transformation not required. */
3476 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3477 for (i
= 0; i
< nargs
; i
++)
3478 if ((bestn
->simdclone
->args
[i
].arg_type
3479 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3480 || (bestn
->simdclone
->args
[i
].arg_type
3481 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3483 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3485 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3486 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3487 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3488 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3489 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3490 tree sll
= arginfo
[i
].simd_lane_linear
3491 ? boolean_true_node
: boolean_false_node
;
3492 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3494 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3495 if (dump_enabled_p ())
3496 dump_printf_loc (MSG_NOTE
, vect_location
,
3497 "=== vectorizable_simd_clone_call ===\n");
3498 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3504 if (dump_enabled_p ())
3505 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3508 scalar_dest
= gimple_call_lhs (stmt
);
3509 vec_dest
= NULL_TREE
;
3514 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3515 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3516 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3519 rtype
= TREE_TYPE (ratype
);
3523 prev_stmt_info
= NULL
;
3524 for (j
= 0; j
< ncopies
; ++j
)
3526 /* Build argument list for the vectorized call. */
3528 vargs
.create (nargs
);
3532 for (i
= 0; i
< nargs
; i
++)
3534 unsigned int k
, l
, m
, o
;
3536 op
= gimple_call_arg (stmt
, i
);
3537 switch (bestn
->simdclone
->args
[i
].arg_type
)
3539 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3540 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3541 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3542 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3544 if (TYPE_VECTOR_SUBPARTS (atype
)
3545 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3547 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3548 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3549 / TYPE_VECTOR_SUBPARTS (atype
));
3550 gcc_assert ((k
& (k
- 1)) == 0);
3553 = vect_get_vec_def_for_operand (op
, stmt
);
3556 vec_oprnd0
= arginfo
[i
].op
;
3557 if ((m
& (k
- 1)) == 0)
3559 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3562 arginfo
[i
].op
= vec_oprnd0
;
3564 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3566 bitsize_int ((m
& (k
- 1)) * prec
));
3568 = gimple_build_assign (make_ssa_name (atype
),
3570 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3571 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3575 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3576 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3577 gcc_assert ((k
& (k
- 1)) == 0);
3578 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3580 vec_alloc (ctor_elts
, k
);
3583 for (l
= 0; l
< k
; l
++)
3585 if (m
== 0 && l
== 0)
3587 = vect_get_vec_def_for_operand (op
, stmt
);
3590 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3592 arginfo
[i
].op
= vec_oprnd0
;
3595 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3599 vargs
.safe_push (vec_oprnd0
);
3602 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3604 = gimple_build_assign (make_ssa_name (atype
),
3606 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3607 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3612 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3613 vargs
.safe_push (op
);
3615 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3616 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3621 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3626 edge pe
= loop_preheader_edge (loop
);
3627 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3628 gcc_assert (!new_bb
);
3630 if (arginfo
[i
].simd_lane_linear
)
3632 vargs
.safe_push (arginfo
[i
].op
);
3635 tree phi_res
= copy_ssa_name (op
);
3636 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3637 set_vinfo_for_stmt (new_phi
,
3638 new_stmt_vec_info (new_phi
, loop_vinfo
));
3639 add_phi_arg (new_phi
, arginfo
[i
].op
,
3640 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3642 = POINTER_TYPE_P (TREE_TYPE (op
))
3643 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3644 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3645 ? sizetype
: TREE_TYPE (op
);
3647 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3649 tree tcst
= wide_int_to_tree (type
, cst
);
3650 tree phi_arg
= copy_ssa_name (op
);
3652 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3653 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3654 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3655 set_vinfo_for_stmt (new_stmt
,
3656 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3657 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3659 arginfo
[i
].op
= phi_res
;
3660 vargs
.safe_push (phi_res
);
3665 = POINTER_TYPE_P (TREE_TYPE (op
))
3666 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3667 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3668 ? sizetype
: TREE_TYPE (op
);
3670 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3672 tree tcst
= wide_int_to_tree (type
, cst
);
3673 new_temp
= make_ssa_name (TREE_TYPE (op
));
3674 new_stmt
= gimple_build_assign (new_temp
, code
,
3675 arginfo
[i
].op
, tcst
);
3676 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3677 vargs
.safe_push (new_temp
);
3680 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3681 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3682 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3683 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3684 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3685 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3691 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3694 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3696 new_temp
= create_tmp_var (ratype
);
3697 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3698 == TYPE_VECTOR_SUBPARTS (rtype
))
3699 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3701 new_temp
= make_ssa_name (rtype
, new_stmt
);
3702 gimple_call_set_lhs (new_stmt
, new_temp
);
3704 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3708 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3711 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3712 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3713 gcc_assert ((k
& (k
- 1)) == 0);
3714 for (l
= 0; l
< k
; l
++)
3719 t
= build_fold_addr_expr (new_temp
);
3720 t
= build2 (MEM_REF
, vectype
, t
,
3721 build_int_cst (TREE_TYPE (t
),
3722 l
* prec
/ BITS_PER_UNIT
));
3725 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3726 bitsize_int (prec
), bitsize_int (l
* prec
));
3728 = gimple_build_assign (make_ssa_name (vectype
), t
);
3729 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3730 if (j
== 0 && l
== 0)
3731 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3733 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3735 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3740 tree clobber
= build_constructor (ratype
, NULL
);
3741 TREE_THIS_VOLATILE (clobber
) = 1;
3742 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3743 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3747 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3749 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3750 / TYPE_VECTOR_SUBPARTS (rtype
));
3751 gcc_assert ((k
& (k
- 1)) == 0);
3752 if ((j
& (k
- 1)) == 0)
3753 vec_alloc (ret_ctor_elts
, k
);
3756 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3757 for (m
= 0; m
< o
; m
++)
3759 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3760 size_int (m
), NULL_TREE
, NULL_TREE
);
3762 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3763 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3764 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3765 gimple_assign_lhs (new_stmt
));
3767 tree clobber
= build_constructor (ratype
, NULL
);
3768 TREE_THIS_VOLATILE (clobber
) = 1;
3769 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3770 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3773 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3774 if ((j
& (k
- 1)) != k
- 1)
3776 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3778 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3779 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3781 if ((unsigned) j
== k
- 1)
3782 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3784 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3786 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3791 tree t
= build_fold_addr_expr (new_temp
);
3792 t
= build2 (MEM_REF
, vectype
, t
,
3793 build_int_cst (TREE_TYPE (t
), 0));
3795 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3796 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3797 tree clobber
= build_constructor (ratype
, NULL
);
3798 TREE_THIS_VOLATILE (clobber
) = 1;
3799 vect_finish_stmt_generation (stmt
,
3800 gimple_build_assign (new_temp
,
3806 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3808 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3810 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3815 /* The call in STMT might prevent it from being removed in dce.
3816 We however cannot remove it here, due to the way the ssa name
3817 it defines is mapped to the new definition. So just replace
3818 rhs of the statement with something harmless. */
3825 type
= TREE_TYPE (scalar_dest
);
3826 if (is_pattern_stmt_p (stmt_info
))
3827 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3829 lhs
= gimple_call_lhs (stmt
);
3830 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3833 new_stmt
= gimple_build_nop ();
3834 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3835 set_vinfo_for_stmt (stmt
, NULL
);
3836 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3837 gsi_replace (gsi
, new_stmt
, true);
3838 unlink_stmt_vdef (stmt
);
3844 /* Function vect_gen_widened_results_half
3846 Create a vector stmt whose code, type, number of arguments, and result
3847 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3848 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3849 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3850 needs to be created (DECL is a function-decl of a target-builtin).
3851 STMT is the original scalar stmt that we are vectorizing. */
3854 vect_gen_widened_results_half (enum tree_code code
,
3856 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3857 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3863 /* Generate half of the widened result: */
3864 if (code
== CALL_EXPR
)
3866 /* Target specific support */
3867 if (op_type
== binary_op
)
3868 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3870 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3871 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3872 gimple_call_set_lhs (new_stmt
, new_temp
);
3876 /* Generic support */
3877 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3878 if (op_type
!= binary_op
)
3880 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3881 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3882 gimple_assign_set_lhs (new_stmt
, new_temp
);
3884 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3890 /* Get vectorized definitions for loop-based vectorization. For the first
3891 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3892 scalar operand), and for the rest we get a copy with
3893 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3894 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3895 The vectors are collected into VEC_OPRNDS. */
3898 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3899 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3903 /* Get first vector operand. */
3904 /* All the vector operands except the very first one (that is scalar oprnd)
3906 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3907 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3909 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3911 vec_oprnds
->quick_push (vec_oprnd
);
3913 /* Get second vector operand. */
3914 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3915 vec_oprnds
->quick_push (vec_oprnd
);
3919 /* For conversion in multiple steps, continue to get operands
3922 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3926 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3927 For multi-step conversions store the resulting vectors and call the function
3931 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3932 int multi_step_cvt
, gimple
*stmt
,
3934 gimple_stmt_iterator
*gsi
,
3935 slp_tree slp_node
, enum tree_code code
,
3936 stmt_vec_info
*prev_stmt_info
)
3939 tree vop0
, vop1
, new_tmp
, vec_dest
;
3941 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3943 vec_dest
= vec_dsts
.pop ();
3945 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3947 /* Create demotion operation. */
3948 vop0
= (*vec_oprnds
)[i
];
3949 vop1
= (*vec_oprnds
)[i
+ 1];
3950 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3951 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3952 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3953 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3956 /* Store the resulting vector for next recursive call. */
3957 (*vec_oprnds
)[i
/2] = new_tmp
;
3960 /* This is the last step of the conversion sequence. Store the
3961 vectors in SLP_NODE or in vector info of the scalar statement
3962 (or in STMT_VINFO_RELATED_STMT chain). */
3964 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3967 if (!*prev_stmt_info
)
3968 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3970 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3972 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3977 /* For multi-step demotion operations we first generate demotion operations
3978 from the source type to the intermediate types, and then combine the
3979 results (stored in VEC_OPRNDS) in demotion operation to the destination
3983 /* At each level of recursion we have half of the operands we had at the
3985 vec_oprnds
->truncate ((i
+1)/2);
3986 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3987 stmt
, vec_dsts
, gsi
, slp_node
,
3988 VEC_PACK_TRUNC_EXPR
,
3992 vec_dsts
.quick_push (vec_dest
);
3996 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3997 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3998 the resulting vectors and call the function recursively. */
4001 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4002 vec
<tree
> *vec_oprnds1
,
4003 gimple
*stmt
, tree vec_dest
,
4004 gimple_stmt_iterator
*gsi
,
4005 enum tree_code code1
,
4006 enum tree_code code2
, tree decl1
,
4007 tree decl2
, int op_type
)
4010 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4011 gimple
*new_stmt1
, *new_stmt2
;
4012 vec
<tree
> vec_tmp
= vNULL
;
4014 vec_tmp
.create (vec_oprnds0
->length () * 2);
4015 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4017 if (op_type
== binary_op
)
4018 vop1
= (*vec_oprnds1
)[i
];
4022 /* Generate the two halves of promotion operation. */
4023 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4024 op_type
, vec_dest
, gsi
, stmt
);
4025 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4026 op_type
, vec_dest
, gsi
, stmt
);
4027 if (is_gimple_call (new_stmt1
))
4029 new_tmp1
= gimple_call_lhs (new_stmt1
);
4030 new_tmp2
= gimple_call_lhs (new_stmt2
);
4034 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4035 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4038 /* Store the results for the next step. */
4039 vec_tmp
.quick_push (new_tmp1
);
4040 vec_tmp
.quick_push (new_tmp2
);
4043 vec_oprnds0
->release ();
4044 *vec_oprnds0
= vec_tmp
;
4048 /* Check if STMT performs a conversion operation, that can be vectorized.
4049 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4050 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4051 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4054 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4055 gimple
**vec_stmt
, slp_tree slp_node
)
4059 tree op0
, op1
= NULL_TREE
;
4060 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4061 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4062 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4063 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4064 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4065 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4068 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4070 gimple
*new_stmt
= NULL
;
4071 stmt_vec_info prev_stmt_info
;
4074 tree vectype_out
, vectype_in
;
4076 tree lhs_type
, rhs_type
;
4077 enum { NARROW
, NONE
, WIDEN
} modifier
;
4078 vec
<tree
> vec_oprnds0
= vNULL
;
4079 vec
<tree
> vec_oprnds1
= vNULL
;
4081 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4082 vec_info
*vinfo
= stmt_info
->vinfo
;
4083 int multi_step_cvt
= 0;
4084 vec
<tree
> interm_types
= vNULL
;
4085 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4087 unsigned short fltsz
;
4089 /* Is STMT a vectorizable conversion? */
4091 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4094 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4098 if (!is_gimple_assign (stmt
))
4101 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4104 code
= gimple_assign_rhs_code (stmt
);
4105 if (!CONVERT_EXPR_CODE_P (code
)
4106 && code
!= FIX_TRUNC_EXPR
4107 && code
!= FLOAT_EXPR
4108 && code
!= WIDEN_MULT_EXPR
4109 && code
!= WIDEN_LSHIFT_EXPR
)
4112 op_type
= TREE_CODE_LENGTH (code
);
4114 /* Check types of lhs and rhs. */
4115 scalar_dest
= gimple_assign_lhs (stmt
);
4116 lhs_type
= TREE_TYPE (scalar_dest
);
4117 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4119 op0
= gimple_assign_rhs1 (stmt
);
4120 rhs_type
= TREE_TYPE (op0
);
4122 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4123 && !((INTEGRAL_TYPE_P (lhs_type
)
4124 && INTEGRAL_TYPE_P (rhs_type
))
4125 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4126 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4129 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4130 && ((INTEGRAL_TYPE_P (lhs_type
)
4131 && !type_has_mode_precision_p (lhs_type
))
4132 || (INTEGRAL_TYPE_P (rhs_type
)
4133 && !type_has_mode_precision_p (rhs_type
))))
4135 if (dump_enabled_p ())
4136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4137 "type conversion to/from bit-precision unsupported."
4142 /* Check the operands of the operation. */
4143 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4145 if (dump_enabled_p ())
4146 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4147 "use not simple.\n");
4150 if (op_type
== binary_op
)
4154 op1
= gimple_assign_rhs2 (stmt
);
4155 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4156 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4158 if (CONSTANT_CLASS_P (op0
))
4159 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4161 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4165 if (dump_enabled_p ())
4166 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4167 "use not simple.\n");
4172 /* If op0 is an external or constant defs use a vector type of
4173 the same size as the output vector type. */
4175 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4177 gcc_assert (vectype_in
);
4180 if (dump_enabled_p ())
4182 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4183 "no vectype for scalar type ");
4184 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4185 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4191 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4192 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4194 if (dump_enabled_p ())
4196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4197 "can't convert between boolean and non "
4199 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4200 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4206 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4207 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4208 if (nunits_in
< nunits_out
)
4210 else if (nunits_out
== nunits_in
)
4215 /* Multiple types in SLP are handled by creating the appropriate number of
4216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4220 else if (modifier
== NARROW
)
4221 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4223 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4225 /* Sanity check: make sure that at least one copy of the vectorized stmt
4226 needs to be generated. */
4227 gcc_assert (ncopies
>= 1);
4229 bool found_mode
= false;
4230 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4231 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4232 opt_scalar_mode rhs_mode_iter
;
4234 /* Supportable by target? */
4238 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4240 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4245 if (dump_enabled_p ())
4246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4247 "conversion not supported by target.\n");
4251 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4252 &code1
, &code2
, &multi_step_cvt
,
4255 /* Binary widening operation can only be supported directly by the
4257 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4261 if (code
!= FLOAT_EXPR
4262 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4265 fltsz
= GET_MODE_SIZE (lhs_mode
);
4266 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4268 rhs_mode
= rhs_mode_iter
.require ();
4269 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4273 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4274 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4275 if (cvt_type
== NULL_TREE
)
4278 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4280 if (!supportable_convert_operation (code
, vectype_out
,
4281 cvt_type
, &decl1
, &codecvt1
))
4284 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4285 cvt_type
, &codecvt1
,
4286 &codecvt2
, &multi_step_cvt
,
4290 gcc_assert (multi_step_cvt
== 0);
4292 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4293 vectype_in
, &code1
, &code2
,
4294 &multi_step_cvt
, &interm_types
))
4304 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4305 codecvt2
= ERROR_MARK
;
4309 interm_types
.safe_push (cvt_type
);
4310 cvt_type
= NULL_TREE
;
4315 gcc_assert (op_type
== unary_op
);
4316 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4317 &code1
, &multi_step_cvt
,
4321 if (code
!= FIX_TRUNC_EXPR
4322 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4326 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4327 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4328 if (cvt_type
== NULL_TREE
)
4330 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4333 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4334 &code1
, &multi_step_cvt
,
4343 if (!vec_stmt
) /* transformation not required. */
4345 if (dump_enabled_p ())
4346 dump_printf_loc (MSG_NOTE
, vect_location
,
4347 "=== vectorizable_conversion ===\n");
4348 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4350 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4351 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4353 else if (modifier
== NARROW
)
4355 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4356 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4360 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4361 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4363 interm_types
.release ();
4368 if (dump_enabled_p ())
4369 dump_printf_loc (MSG_NOTE
, vect_location
,
4370 "transform conversion. ncopies = %d.\n", ncopies
);
4372 if (op_type
== binary_op
)
4374 if (CONSTANT_CLASS_P (op0
))
4375 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4376 else if (CONSTANT_CLASS_P (op1
))
4377 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4380 /* In case of multi-step conversion, we first generate conversion operations
4381 to the intermediate types, and then from that types to the final one.
4382 We create vector destinations for the intermediate type (TYPES) received
4383 from supportable_*_operation, and store them in the correct order
4384 for future use in vect_create_vectorized_*_stmts (). */
4385 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4386 vec_dest
= vect_create_destination_var (scalar_dest
,
4387 (cvt_type
&& modifier
== WIDEN
)
4388 ? cvt_type
: vectype_out
);
4389 vec_dsts
.quick_push (vec_dest
);
4393 for (i
= interm_types
.length () - 1;
4394 interm_types
.iterate (i
, &intermediate_type
); i
--)
4396 vec_dest
= vect_create_destination_var (scalar_dest
,
4398 vec_dsts
.quick_push (vec_dest
);
4403 vec_dest
= vect_create_destination_var (scalar_dest
,
4405 ? vectype_out
: cvt_type
);
4409 if (modifier
== WIDEN
)
4411 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4412 if (op_type
== binary_op
)
4413 vec_oprnds1
.create (1);
4415 else if (modifier
== NARROW
)
4416 vec_oprnds0
.create (
4417 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4419 else if (code
== WIDEN_LSHIFT_EXPR
)
4420 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4423 prev_stmt_info
= NULL
;
4427 for (j
= 0; j
< ncopies
; j
++)
4430 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4432 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4434 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4436 /* Arguments are ready, create the new vector stmt. */
4437 if (code1
== CALL_EXPR
)
4439 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4440 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4441 gimple_call_set_lhs (new_stmt
, new_temp
);
4445 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4446 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4447 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4448 gimple_assign_set_lhs (new_stmt
, new_temp
);
4451 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4453 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4456 if (!prev_stmt_info
)
4457 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4459 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4460 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4467 /* In case the vectorization factor (VF) is bigger than the number
4468 of elements that we can fit in a vectype (nunits), we have to
4469 generate more than one vector stmt - i.e - we need to "unroll"
4470 the vector stmt by a factor VF/nunits. */
4471 for (j
= 0; j
< ncopies
; j
++)
4478 if (code
== WIDEN_LSHIFT_EXPR
)
4483 /* Store vec_oprnd1 for every vector stmt to be created
4484 for SLP_NODE. We check during the analysis that all
4485 the shift arguments are the same. */
4486 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4487 vec_oprnds1
.quick_push (vec_oprnd1
);
4489 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4493 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4494 &vec_oprnds1
, slp_node
);
4498 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4499 vec_oprnds0
.quick_push (vec_oprnd0
);
4500 if (op_type
== binary_op
)
4502 if (code
== WIDEN_LSHIFT_EXPR
)
4505 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4506 vec_oprnds1
.quick_push (vec_oprnd1
);
4512 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4513 vec_oprnds0
.truncate (0);
4514 vec_oprnds0
.quick_push (vec_oprnd0
);
4515 if (op_type
== binary_op
)
4517 if (code
== WIDEN_LSHIFT_EXPR
)
4520 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4522 vec_oprnds1
.truncate (0);
4523 vec_oprnds1
.quick_push (vec_oprnd1
);
4527 /* Arguments are ready. Create the new vector stmts. */
4528 for (i
= multi_step_cvt
; i
>= 0; i
--)
4530 tree this_dest
= vec_dsts
[i
];
4531 enum tree_code c1
= code1
, c2
= code2
;
4532 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4537 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4539 stmt
, this_dest
, gsi
,
4540 c1
, c2
, decl1
, decl2
,
4544 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4548 if (codecvt1
== CALL_EXPR
)
4550 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4551 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4552 gimple_call_set_lhs (new_stmt
, new_temp
);
4556 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4557 new_temp
= make_ssa_name (vec_dest
);
4558 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4562 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4565 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4568 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4571 if (!prev_stmt_info
)
4572 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4574 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4575 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4580 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4584 /* In case the vectorization factor (VF) is bigger than the number
4585 of elements that we can fit in a vectype (nunits), we have to
4586 generate more than one vector stmt - i.e - we need to "unroll"
4587 the vector stmt by a factor VF/nunits. */
4588 for (j
= 0; j
< ncopies
; j
++)
4592 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4596 vec_oprnds0
.truncate (0);
4597 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4598 vect_pow2 (multi_step_cvt
) - 1);
4601 /* Arguments are ready. Create the new vector stmts. */
4603 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4605 if (codecvt1
== CALL_EXPR
)
4607 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4608 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4609 gimple_call_set_lhs (new_stmt
, new_temp
);
4613 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4614 new_temp
= make_ssa_name (vec_dest
);
4615 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4619 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4620 vec_oprnds0
[i
] = new_temp
;
4623 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4624 stmt
, vec_dsts
, gsi
,
4629 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4633 vec_oprnds0
.release ();
4634 vec_oprnds1
.release ();
4635 interm_types
.release ();
4641 /* Function vectorizable_assignment.
4643 Check if STMT performs an assignment (copy) that can be vectorized.
4644 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4645 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4646 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4649 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4650 gimple
**vec_stmt
, slp_tree slp_node
)
4655 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4656 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4659 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
4663 vec
<tree
> vec_oprnds
= vNULL
;
4665 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4666 vec_info
*vinfo
= stmt_info
->vinfo
;
4667 gimple
*new_stmt
= NULL
;
4668 stmt_vec_info prev_stmt_info
= NULL
;
4669 enum tree_code code
;
4672 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4675 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4679 /* Is vectorizable assignment? */
4680 if (!is_gimple_assign (stmt
))
4683 scalar_dest
= gimple_assign_lhs (stmt
);
4684 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4687 code
= gimple_assign_rhs_code (stmt
);
4688 if (gimple_assign_single_p (stmt
)
4689 || code
== PAREN_EXPR
4690 || CONVERT_EXPR_CODE_P (code
))
4691 op
= gimple_assign_rhs1 (stmt
);
4695 if (code
== VIEW_CONVERT_EXPR
)
4696 op
= TREE_OPERAND (op
, 0);
4698 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4699 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4701 /* Multiple types in SLP are handled by creating the appropriate number of
4702 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4707 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4709 gcc_assert (ncopies
>= 1);
4711 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4713 if (dump_enabled_p ())
4714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4715 "use not simple.\n");
4719 /* We can handle NOP_EXPR conversions that do not change the number
4720 of elements or the vector size. */
4721 if ((CONVERT_EXPR_CODE_P (code
)
4722 || code
== VIEW_CONVERT_EXPR
)
4724 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4725 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4726 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4729 /* We do not handle bit-precision changes. */
4730 if ((CONVERT_EXPR_CODE_P (code
)
4731 || code
== VIEW_CONVERT_EXPR
)
4732 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4733 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
4734 || !type_has_mode_precision_p (TREE_TYPE (op
)))
4735 /* But a conversion that does not change the bit-pattern is ok. */
4736 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4737 > TYPE_PRECISION (TREE_TYPE (op
)))
4738 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4739 /* Conversion between boolean types of different sizes is
4740 a simple assignment in case their vectypes are same
4742 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4743 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4745 if (dump_enabled_p ())
4746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4747 "type conversion to/from bit-precision "
4752 if (!vec_stmt
) /* transformation not required. */
4754 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4755 if (dump_enabled_p ())
4756 dump_printf_loc (MSG_NOTE
, vect_location
,
4757 "=== vectorizable_assignment ===\n");
4758 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4763 if (dump_enabled_p ())
4764 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4767 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4770 for (j
= 0; j
< ncopies
; j
++)
4774 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
4776 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4778 /* Arguments are ready. create the new vector stmt. */
4779 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4781 if (CONVERT_EXPR_CODE_P (code
)
4782 || code
== VIEW_CONVERT_EXPR
)
4783 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4784 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4785 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4786 gimple_assign_set_lhs (new_stmt
, new_temp
);
4787 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4789 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4796 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4798 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4800 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4803 vec_oprnds
.release ();
4808 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4809 either as shift by a scalar or by a vector. */
4812 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4815 machine_mode vec_mode
;
4820 vectype
= get_vectype_for_scalar_type (scalar_type
);
4824 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4826 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4828 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4830 || (optab_handler (optab
, TYPE_MODE (vectype
))
4831 == CODE_FOR_nothing
))
4835 vec_mode
= TYPE_MODE (vectype
);
4836 icode
= (int) optab_handler (optab
, vec_mode
);
4837 if (icode
== CODE_FOR_nothing
)
4844 /* Function vectorizable_shift.
4846 Check if STMT performs a shift operation that can be vectorized.
4847 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4848 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4849 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4852 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4853 gimple
**vec_stmt
, slp_tree slp_node
)
4857 tree op0
, op1
= NULL
;
4858 tree vec_oprnd1
= NULL_TREE
;
4859 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4861 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4862 enum tree_code code
;
4863 machine_mode vec_mode
;
4867 machine_mode optab_op2_mode
;
4869 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4871 gimple
*new_stmt
= NULL
;
4872 stmt_vec_info prev_stmt_info
;
4879 vec
<tree
> vec_oprnds0
= vNULL
;
4880 vec
<tree
> vec_oprnds1
= vNULL
;
4883 bool scalar_shift_arg
= true;
4884 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4885 vec_info
*vinfo
= stmt_info
->vinfo
;
4887 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4890 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4894 /* Is STMT a vectorizable binary/unary operation? */
4895 if (!is_gimple_assign (stmt
))
4898 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4901 code
= gimple_assign_rhs_code (stmt
);
4903 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4904 || code
== RROTATE_EXPR
))
4907 scalar_dest
= gimple_assign_lhs (stmt
);
4908 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4909 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
4911 if (dump_enabled_p ())
4912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4913 "bit-precision shifts not supported.\n");
4917 op0
= gimple_assign_rhs1 (stmt
);
4918 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4920 if (dump_enabled_p ())
4921 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4922 "use not simple.\n");
4925 /* If op0 is an external or constant def use a vector type with
4926 the same size as the output vector type. */
4928 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4930 gcc_assert (vectype
);
4933 if (dump_enabled_p ())
4934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4935 "no vectype for scalar type\n");
4939 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4940 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4941 if (nunits_out
!= nunits_in
)
4944 op1
= gimple_assign_rhs2 (stmt
);
4945 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4947 if (dump_enabled_p ())
4948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4949 "use not simple.\n");
4953 /* Multiple types in SLP are handled by creating the appropriate number of
4954 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4959 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4961 gcc_assert (ncopies
>= 1);
4963 /* Determine whether the shift amount is a vector, or scalar. If the
4964 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4966 if ((dt
[1] == vect_internal_def
4967 || dt
[1] == vect_induction_def
)
4969 scalar_shift_arg
= false;
4970 else if (dt
[1] == vect_constant_def
4971 || dt
[1] == vect_external_def
4972 || dt
[1] == vect_internal_def
)
4974 /* In SLP, need to check whether the shift count is the same,
4975 in loops if it is a constant or invariant, it is always
4979 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4982 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4983 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4984 scalar_shift_arg
= false;
4987 /* If the shift amount is computed by a pattern stmt we cannot
4988 use the scalar amount directly thus give up and use a vector
4990 if (dt
[1] == vect_internal_def
)
4992 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4993 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4994 scalar_shift_arg
= false;
4999 if (dump_enabled_p ())
5000 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5001 "operand mode requires invariant argument.\n");
5005 /* Vector shifted by vector. */
5006 if (!scalar_shift_arg
)
5008 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5009 if (dump_enabled_p ())
5010 dump_printf_loc (MSG_NOTE
, vect_location
,
5011 "vector/vector shift/rotate found.\n");
5014 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5015 if (op1_vectype
== NULL_TREE
5016 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5018 if (dump_enabled_p ())
5019 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5020 "unusable type for last operand in"
5021 " vector/vector shift/rotate.\n");
5025 /* See if the machine has a vector shifted by scalar insn and if not
5026 then see if it has a vector shifted by vector insn. */
5029 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5031 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5033 if (dump_enabled_p ())
5034 dump_printf_loc (MSG_NOTE
, vect_location
,
5035 "vector/scalar shift/rotate found.\n");
5039 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5041 && (optab_handler (optab
, TYPE_MODE (vectype
))
5042 != CODE_FOR_nothing
))
5044 scalar_shift_arg
= false;
5046 if (dump_enabled_p ())
5047 dump_printf_loc (MSG_NOTE
, vect_location
,
5048 "vector/vector shift/rotate found.\n");
5050 /* Unlike the other binary operators, shifts/rotates have
5051 the rhs being int, instead of the same type as the lhs,
5052 so make sure the scalar is the right type if we are
5053 dealing with vectors of long long/long/short/char. */
5054 if (dt
[1] == vect_constant_def
)
5055 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5056 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5060 && TYPE_MODE (TREE_TYPE (vectype
))
5061 != TYPE_MODE (TREE_TYPE (op1
)))
5063 if (dump_enabled_p ())
5064 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5065 "unusable type for last operand in"
5066 " vector/vector shift/rotate.\n");
5069 if (vec_stmt
&& !slp_node
)
5071 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5072 op1
= vect_init_vector (stmt
, op1
,
5073 TREE_TYPE (vectype
), NULL
);
5080 /* Supportable by target? */
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5088 vec_mode
= TYPE_MODE (vectype
);
5089 icode
= (int) optab_handler (optab
, vec_mode
);
5090 if (icode
== CODE_FOR_nothing
)
5092 if (dump_enabled_p ())
5093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5094 "op not supported by target.\n");
5095 /* Check only during analysis. */
5096 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5098 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5100 if (dump_enabled_p ())
5101 dump_printf_loc (MSG_NOTE
, vect_location
,
5102 "proceeding using word mode.\n");
5105 /* Worthwhile without SIMD support? Check only during analysis. */
5107 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5108 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5110 if (dump_enabled_p ())
5111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5112 "not worthwhile without SIMD support.\n");
5116 if (!vec_stmt
) /* transformation not required. */
5118 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_NOTE
, vect_location
,
5121 "=== vectorizable_shift ===\n");
5122 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_NOTE
, vect_location
,
5130 "transform binary/unary operation.\n");
5133 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5135 prev_stmt_info
= NULL
;
5136 for (j
= 0; j
< ncopies
; j
++)
5141 if (scalar_shift_arg
)
5143 /* Vector shl and shr insn patterns can be defined with scalar
5144 operand 2 (shift operand). In this case, use constant or loop
5145 invariant op1 directly, without extending it to vector mode
5147 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5148 if (!VECTOR_MODE_P (optab_op2_mode
))
5150 if (dump_enabled_p ())
5151 dump_printf_loc (MSG_NOTE
, vect_location
,
5152 "operand 1 using scalar mode.\n");
5154 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5155 vec_oprnds1
.quick_push (vec_oprnd1
);
5158 /* Store vec_oprnd1 for every vector stmt to be created
5159 for SLP_NODE. We check during the analysis that all
5160 the shift arguments are the same.
5161 TODO: Allow different constants for different vector
5162 stmts generated for an SLP instance. */
5163 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5164 vec_oprnds1
.quick_push (vec_oprnd1
);
5169 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5170 (a special case for certain kind of vector shifts); otherwise,
5171 operand 1 should be of a vector type (the usual case). */
5173 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5176 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5180 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5182 /* Arguments are ready. Create the new vector stmt. */
5183 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5185 vop1
= vec_oprnds1
[i
];
5186 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5187 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5188 gimple_assign_set_lhs (new_stmt
, new_temp
);
5189 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5191 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5198 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5200 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5201 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5204 vec_oprnds0
.release ();
5205 vec_oprnds1
.release ();
5211 /* Function vectorizable_operation.
5213 Check if STMT performs a binary, unary or ternary operation that can
5215 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5216 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5217 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5220 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5221 gimple
**vec_stmt
, slp_tree slp_node
)
5225 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5226 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5228 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5229 enum tree_code code
;
5230 machine_mode vec_mode
;
5234 bool target_support_p
;
5236 enum vect_def_type dt
[3]
5237 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5239 gimple
*new_stmt
= NULL
;
5240 stmt_vec_info prev_stmt_info
;
5246 vec
<tree
> vec_oprnds0
= vNULL
;
5247 vec
<tree
> vec_oprnds1
= vNULL
;
5248 vec
<tree
> vec_oprnds2
= vNULL
;
5249 tree vop0
, vop1
, vop2
;
5250 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5251 vec_info
*vinfo
= stmt_info
->vinfo
;
5253 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5256 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5260 /* Is STMT a vectorizable binary/unary operation? */
5261 if (!is_gimple_assign (stmt
))
5264 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5267 code
= gimple_assign_rhs_code (stmt
);
5269 /* For pointer addition and subtraction, we should use the normal
5270 plus and minus for the vector operation. */
5271 if (code
== POINTER_PLUS_EXPR
)
5273 if (code
== POINTER_DIFF_EXPR
)
5276 /* Support only unary or binary operations. */
5277 op_type
= TREE_CODE_LENGTH (code
);
5278 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5282 "num. args = %d (not unary/binary/ternary op).\n",
5287 scalar_dest
= gimple_assign_lhs (stmt
);
5288 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5290 /* Most operations cannot handle bit-precision types without extra
5292 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5293 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5294 /* Exception are bitwise binary operations. */
5295 && code
!= BIT_IOR_EXPR
5296 && code
!= BIT_XOR_EXPR
5297 && code
!= BIT_AND_EXPR
)
5299 if (dump_enabled_p ())
5300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5301 "bit-precision arithmetic not supported.\n");
5305 op0
= gimple_assign_rhs1 (stmt
);
5306 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5308 if (dump_enabled_p ())
5309 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5310 "use not simple.\n");
5313 /* If op0 is an external or constant def use a vector type with
5314 the same size as the output vector type. */
5317 /* For boolean type we cannot determine vectype by
5318 invariant value (don't know whether it is a vector
5319 of booleans or vector of integers). We use output
5320 vectype because operations on boolean don't change
5322 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5324 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5326 if (dump_enabled_p ())
5327 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5328 "not supported operation on bool value.\n");
5331 vectype
= vectype_out
;
5334 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5337 gcc_assert (vectype
);
5340 if (dump_enabled_p ())
5342 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5343 "no vectype for scalar type ");
5344 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5346 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5352 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5353 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5354 if (nunits_out
!= nunits_in
)
5357 if (op_type
== binary_op
|| op_type
== ternary_op
)
5359 op1
= gimple_assign_rhs2 (stmt
);
5360 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5362 if (dump_enabled_p ())
5363 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5364 "use not simple.\n");
5368 if (op_type
== ternary_op
)
5370 op2
= gimple_assign_rhs3 (stmt
);
5371 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5373 if (dump_enabled_p ())
5374 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5375 "use not simple.\n");
5380 /* Multiple types in SLP are handled by creating the appropriate number of
5381 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5386 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5388 gcc_assert (ncopies
>= 1);
5390 /* Shifts are handled in vectorizable_shift (). */
5391 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5392 || code
== RROTATE_EXPR
)
5395 /* Supportable by target? */
5397 vec_mode
= TYPE_MODE (vectype
);
5398 if (code
== MULT_HIGHPART_EXPR
)
5399 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5402 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5405 if (dump_enabled_p ())
5406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5410 target_support_p
= (optab_handler (optab
, vec_mode
)
5411 != CODE_FOR_nothing
);
5414 if (!target_support_p
)
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5418 "op not supported by target.\n");
5419 /* Check only during analysis. */
5420 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5421 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_NOTE
, vect_location
,
5425 "proceeding using word mode.\n");
5428 /* Worthwhile without SIMD support? Check only during analysis. */
5429 if (!VECTOR_MODE_P (vec_mode
)
5431 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5433 if (dump_enabled_p ())
5434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5435 "not worthwhile without SIMD support.\n");
5439 if (!vec_stmt
) /* transformation not required. */
5441 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5442 if (dump_enabled_p ())
5443 dump_printf_loc (MSG_NOTE
, vect_location
,
5444 "=== vectorizable_operation ===\n");
5445 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5451 if (dump_enabled_p ())
5452 dump_printf_loc (MSG_NOTE
, vect_location
,
5453 "transform binary/unary operation.\n");
5456 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5458 /* In case the vectorization factor (VF) is bigger than the number
5459 of elements that we can fit in a vectype (nunits), we have to generate
5460 more than one vector stmt - i.e - we need to "unroll" the
5461 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5462 from one copy of the vector stmt to the next, in the field
5463 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5464 stages to find the correct vector defs to be used when vectorizing
5465 stmts that use the defs of the current stmt. The example below
5466 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5467 we need to create 4 vectorized stmts):
5469 before vectorization:
5470 RELATED_STMT VEC_STMT
5474 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5476 RELATED_STMT VEC_STMT
5477 VS1_0: vx0 = memref0 VS1_1 -
5478 VS1_1: vx1 = memref1 VS1_2 -
5479 VS1_2: vx2 = memref2 VS1_3 -
5480 VS1_3: vx3 = memref3 - -
5481 S1: x = load - VS1_0
5484 step2: vectorize stmt S2 (done here):
5485 To vectorize stmt S2 we first need to find the relevant vector
5486 def for the first operand 'x'. This is, as usual, obtained from
5487 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5488 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5489 relevant vector def 'vx0'. Having found 'vx0' we can generate
5490 the vector stmt VS2_0, and as usual, record it in the
5491 STMT_VINFO_VEC_STMT of stmt S2.
5492 When creating the second copy (VS2_1), we obtain the relevant vector
5493 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5494 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5495 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5496 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5497 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5498 chain of stmts and pointers:
5499 RELATED_STMT VEC_STMT
5500 VS1_0: vx0 = memref0 VS1_1 -
5501 VS1_1: vx1 = memref1 VS1_2 -
5502 VS1_2: vx2 = memref2 VS1_3 -
5503 VS1_3: vx3 = memref3 - -
5504 S1: x = load - VS1_0
5505 VS2_0: vz0 = vx0 + v1 VS2_1 -
5506 VS2_1: vz1 = vx1 + v1 VS2_2 -
5507 VS2_2: vz2 = vx2 + v1 VS2_3 -
5508 VS2_3: vz3 = vx3 + v1 - -
5509 S2: z = x + 1 - VS2_0 */
5511 prev_stmt_info
= NULL
;
5512 for (j
= 0; j
< ncopies
; j
++)
5517 if (op_type
== binary_op
|| op_type
== ternary_op
)
5518 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5521 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5523 if (op_type
== ternary_op
)
5524 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5529 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5530 if (op_type
== ternary_op
)
5532 tree vec_oprnd
= vec_oprnds2
.pop ();
5533 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5538 /* Arguments are ready. Create the new vector stmt. */
5539 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5541 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5542 ? vec_oprnds1
[i
] : NULL_TREE
);
5543 vop2
= ((op_type
== ternary_op
)
5544 ? vec_oprnds2
[i
] : NULL_TREE
);
5545 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5546 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5547 gimple_assign_set_lhs (new_stmt
, new_temp
);
5548 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5550 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5557 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5559 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5560 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5563 vec_oprnds0
.release ();
5564 vec_oprnds1
.release ();
5565 vec_oprnds2
.release ();
5570 /* A helper function to ensure data reference DR's base alignment. */
5573 ensure_base_align (struct data_reference
*dr
)
5578 if (DR_VECT_AUX (dr
)->base_misaligned
)
5580 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5582 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
5584 if (decl_in_symtab_p (base_decl
))
5585 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
5588 SET_DECL_ALIGN (base_decl
, align_base_to
);
5589 DECL_USER_ALIGN (base_decl
) = 1;
5591 DR_VECT_AUX (dr
)->base_misaligned
= false;
5596 /* Function get_group_alias_ptr_type.
5598 Return the alias type for the group starting at FIRST_STMT. */
5601 get_group_alias_ptr_type (gimple
*first_stmt
)
5603 struct data_reference
*first_dr
, *next_dr
;
5606 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5607 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5610 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5611 if (get_alias_set (DR_REF (first_dr
))
5612 != get_alias_set (DR_REF (next_dr
)))
5614 if (dump_enabled_p ())
5615 dump_printf_loc (MSG_NOTE
, vect_location
,
5616 "conflicting alias set types.\n");
5617 return ptr_type_node
;
5619 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5621 return reference_alias_ptr_type (DR_REF (first_dr
));
5625 /* Function vectorizable_store.
5627 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5629 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5630 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5631 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5634 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5640 tree vec_oprnd
= NULL_TREE
;
5641 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5642 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5644 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5645 struct loop
*loop
= NULL
;
5646 machine_mode vec_mode
;
5648 enum dr_alignment_support alignment_support_scheme
;
5650 enum vect_def_type dt
;
5651 stmt_vec_info prev_stmt_info
= NULL
;
5652 tree dataref_ptr
= NULL_TREE
;
5653 tree dataref_offset
= NULL_TREE
;
5654 gimple
*ptr_incr
= NULL
;
5657 gimple
*next_stmt
, *first_stmt
;
5659 unsigned int group_size
, i
;
5660 vec
<tree
> oprnds
= vNULL
;
5661 vec
<tree
> result_chain
= vNULL
;
5663 tree offset
= NULL_TREE
;
5664 vec
<tree
> vec_oprnds
= vNULL
;
5665 bool slp
= (slp_node
!= NULL
);
5666 unsigned int vec_num
;
5667 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5668 vec_info
*vinfo
= stmt_info
->vinfo
;
5670 gather_scatter_info gs_info
;
5671 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5674 vec_load_store_type vls_type
;
5677 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5680 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5684 /* Is vectorizable store? */
5686 if (!is_gimple_assign (stmt
))
5689 scalar_dest
= gimple_assign_lhs (stmt
);
5690 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5691 && is_pattern_stmt_p (stmt_info
))
5692 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5693 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5694 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5695 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5696 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5697 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5698 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5699 && TREE_CODE (scalar_dest
) != MEM_REF
)
5702 /* Cannot have hybrid store SLP -- that would mean storing to the
5703 same location twice. */
5704 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5706 gcc_assert (gimple_assign_single_p (stmt
));
5708 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5709 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5713 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5714 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5719 /* Multiple types in SLP are handled by creating the appropriate number of
5720 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5725 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5727 gcc_assert (ncopies
>= 1);
5729 /* FORNOW. This restriction should be relaxed. */
5730 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5732 if (dump_enabled_p ())
5733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5734 "multiple types in nested loop.\n");
5738 op
= gimple_assign_rhs1 (stmt
);
5740 /* In the case this is a store from a constant make sure
5741 native_encode_expr can handle it. */
5742 if (CONSTANT_CLASS_P (op
) && native_encode_expr (op
, NULL
, 64) == 0)
5745 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5747 if (dump_enabled_p ())
5748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5749 "use not simple.\n");
5753 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5754 vls_type
= VLS_STORE_INVARIANT
;
5756 vls_type
= VLS_STORE
;
5758 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5761 elem_type
= TREE_TYPE (vectype
);
5762 vec_mode
= TYPE_MODE (vectype
);
5764 /* FORNOW. In some cases can vectorize even if data-type not supported
5765 (e.g. - array initialization with 0). */
5766 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5769 if (!STMT_VINFO_DATA_REF (stmt_info
))
5772 vect_memory_access_type memory_access_type
;
5773 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5774 &memory_access_type
, &gs_info
))
5777 if (!vec_stmt
) /* transformation not required. */
5779 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5780 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5781 /* The SLP costs are calculated during SLP analysis. */
5782 if (!PURE_SLP_STMT (stmt_info
))
5783 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5787 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5791 ensure_base_align (dr
);
5793 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5795 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5796 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5797 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5798 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5799 edge pe
= loop_preheader_edge (loop
);
5802 enum { NARROW
, NONE
, WIDEN
} modifier
;
5803 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5805 if (nunits
== (unsigned int) scatter_off_nunits
)
5807 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5811 auto_vec_perm_indices
sel (scatter_off_nunits
);
5812 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5813 sel
.quick_push (i
| nunits
);
5815 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
5816 gcc_assert (perm_mask
!= NULL_TREE
);
5818 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5822 auto_vec_perm_indices
sel (nunits
);
5823 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5824 sel
.quick_push (i
| scatter_off_nunits
);
5826 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5827 gcc_assert (perm_mask
!= NULL_TREE
);
5833 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5834 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5835 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5836 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5837 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5838 scaletype
= TREE_VALUE (arglist
);
5840 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5841 && TREE_CODE (rettype
) == VOID_TYPE
);
5843 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5844 if (!is_gimple_min_invariant (ptr
))
5846 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5847 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5848 gcc_assert (!new_bb
);
5851 /* Currently we support only unconditional scatter stores,
5852 so mask should be all ones. */
5853 mask
= build_int_cst (masktype
, -1);
5854 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5856 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5858 prev_stmt_info
= NULL
;
5859 for (j
= 0; j
< ncopies
; ++j
)
5864 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5866 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5868 else if (modifier
!= NONE
&& (j
& 1))
5870 if (modifier
== WIDEN
)
5873 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5874 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5877 else if (modifier
== NARROW
)
5879 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5882 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5891 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5893 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5897 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5899 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5900 == TYPE_VECTOR_SUBPARTS (srctype
));
5901 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5902 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5903 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5904 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5908 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5910 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5911 == TYPE_VECTOR_SUBPARTS (idxtype
));
5912 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5913 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5914 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5915 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5920 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5922 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5924 if (prev_stmt_info
== NULL
)
5925 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5927 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5928 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5933 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5936 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5937 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5938 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5940 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5943 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5945 /* We vectorize all the stmts of the interleaving group when we
5946 reach the last stmt in the group. */
5947 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5948 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5957 grouped_store
= false;
5958 /* VEC_NUM is the number of vect stmts to be created for this
5960 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5961 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5962 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5963 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5964 op
= gimple_assign_rhs1 (first_stmt
);
5967 /* VEC_NUM is the number of vect stmts to be created for this
5969 vec_num
= group_size
;
5971 ref_type
= get_group_alias_ptr_type (first_stmt
);
5977 group_size
= vec_num
= 1;
5978 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
5981 if (dump_enabled_p ())
5982 dump_printf_loc (MSG_NOTE
, vect_location
,
5983 "transform store. ncopies = %d\n", ncopies
);
5985 if (memory_access_type
== VMAT_ELEMENTWISE
5986 || memory_access_type
== VMAT_STRIDED_SLP
)
5988 gimple_stmt_iterator incr_gsi
;
5994 gimple_seq stmts
= NULL
;
5995 tree stride_base
, stride_step
, alias_off
;
5999 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6002 = fold_build_pointer_plus
6003 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
6004 size_binop (PLUS_EXPR
,
6005 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
6006 convert_to_ptrofftype (DR_INIT (first_dr
))));
6007 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
6009 /* For a store with loop-invariant (but other than power-of-2)
6010 stride (i.e. not a grouped access) like so:
6012 for (i = 0; i < n; i += stride)
6015 we generate a new induction variable and new stores from
6016 the components of the (vectorized) rhs:
6018 for (j = 0; ; j += VF*stride)
6023 array[j + stride] = tmp2;
6027 unsigned nstores
= nunits
;
6029 tree ltype
= elem_type
;
6030 tree lvectype
= vectype
;
6033 if (group_size
< nunits
6034 && nunits
% group_size
== 0)
6036 nstores
= nunits
/ group_size
;
6038 ltype
= build_vector_type (elem_type
, group_size
);
6041 /* First check if vec_extract optab doesn't support extraction
6042 of vector elts directly. */
6043 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6045 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6046 || !VECTOR_MODE_P (vmode
)
6047 || (convert_optab_handler (vec_extract_optab
,
6048 TYPE_MODE (vectype
), vmode
)
6049 == CODE_FOR_nothing
))
6051 /* Try to avoid emitting an extract of vector elements
6052 by performing the extracts using an integer type of the
6053 same size, extracting from a vector of those and then
6054 re-interpreting it as the original vector type if
6057 = group_size
* GET_MODE_BITSIZE (elmode
);
6058 elmode
= int_mode_for_size (lsize
, 0).require ();
6059 /* If we can't construct such a vector fall back to
6060 element extracts from the original vector type and
6061 element size stores. */
6062 if (mode_for_vector (elmode
,
6063 nunits
/ group_size
).exists (&vmode
)
6064 && VECTOR_MODE_P (vmode
)
6065 && (convert_optab_handler (vec_extract_optab
,
6067 != CODE_FOR_nothing
))
6069 nstores
= nunits
/ group_size
;
6071 ltype
= build_nonstandard_integer_type (lsize
, 1);
6072 lvectype
= build_vector_type (ltype
, nstores
);
6074 /* Else fall back to vector extraction anyway.
6075 Fewer stores are more important than avoiding spilling
6076 of the vector we extract from. Compared to the
6077 construction case in vectorizable_load no store-forwarding
6078 issue exists here for reasonable archs. */
6081 else if (group_size
>= nunits
6082 && group_size
% nunits
== 0)
6089 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6090 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6093 ivstep
= stride_step
;
6094 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6095 build_int_cst (TREE_TYPE (ivstep
), vf
));
6097 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6099 create_iv (stride_base
, ivstep
, NULL
,
6100 loop
, &incr_gsi
, insert_after
,
6102 incr
= gsi_stmt (incr_gsi
);
6103 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6105 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6107 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6109 prev_stmt_info
= NULL
;
6110 alias_off
= build_int_cst (ref_type
, 0);
6111 next_stmt
= first_stmt
;
6112 for (g
= 0; g
< group_size
; g
++)
6114 running_off
= offvar
;
6117 tree size
= TYPE_SIZE_UNIT (ltype
);
6118 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6120 tree newoff
= copy_ssa_name (running_off
, NULL
);
6121 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6123 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6124 running_off
= newoff
;
6126 unsigned int group_el
= 0;
6127 unsigned HOST_WIDE_INT
6128 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6129 for (j
= 0; j
< ncopies
; j
++)
6131 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6132 and first_stmt == stmt. */
6137 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6139 vec_oprnd
= vec_oprnds
[0];
6143 gcc_assert (gimple_assign_single_p (next_stmt
));
6144 op
= gimple_assign_rhs1 (next_stmt
);
6145 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6151 vec_oprnd
= vec_oprnds
[j
];
6154 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6155 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6158 /* Pun the vector to extract from if necessary. */
6159 if (lvectype
!= vectype
)
6161 tree tem
= make_ssa_name (lvectype
);
6163 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6164 lvectype
, vec_oprnd
));
6165 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6168 for (i
= 0; i
< nstores
; i
++)
6170 tree newref
, newoff
;
6171 gimple
*incr
, *assign
;
6172 tree size
= TYPE_SIZE (ltype
);
6173 /* Extract the i'th component. */
6174 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6175 bitsize_int (i
), size
);
6176 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6179 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6183 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6185 newref
= build2 (MEM_REF
, ltype
,
6186 running_off
, this_off
);
6188 /* And store it to *running_off. */
6189 assign
= gimple_build_assign (newref
, elem
);
6190 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6194 || group_el
== group_size
)
6196 newoff
= copy_ssa_name (running_off
, NULL
);
6197 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6198 running_off
, stride_step
);
6199 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6201 running_off
= newoff
;
6204 if (g
== group_size
- 1
6207 if (j
== 0 && i
== 0)
6208 STMT_VINFO_VEC_STMT (stmt_info
)
6209 = *vec_stmt
= assign
;
6211 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6212 prev_stmt_info
= vinfo_for_stmt (assign
);
6216 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6221 vec_oprnds
.release ();
6225 auto_vec
<tree
> dr_chain (group_size
);
6226 oprnds
.create (group_size
);
6228 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6229 gcc_assert (alignment_support_scheme
);
6230 /* Targets with store-lane instructions must not require explicit
6232 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6233 || alignment_support_scheme
== dr_aligned
6234 || alignment_support_scheme
== dr_unaligned_supported
);
6236 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6237 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6238 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6240 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6241 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6243 aggr_type
= vectype
;
6245 /* In case the vectorization factor (VF) is bigger than the number
6246 of elements that we can fit in a vectype (nunits), we have to generate
6247 more than one vector stmt - i.e - we need to "unroll" the
6248 vector stmt by a factor VF/nunits. For more details see documentation in
6249 vect_get_vec_def_for_copy_stmt. */
6251 /* In case of interleaving (non-unit grouped access):
6258 We create vectorized stores starting from base address (the access of the
6259 first stmt in the chain (S2 in the above example), when the last store stmt
6260 of the chain (S4) is reached:
6263 VS2: &base + vec_size*1 = vx0
6264 VS3: &base + vec_size*2 = vx1
6265 VS4: &base + vec_size*3 = vx3
6267 Then permutation statements are generated:
6269 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6270 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6273 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6274 (the order of the data-refs in the output of vect_permute_store_chain
6275 corresponds to the order of scalar stmts in the interleaving chain - see
6276 the documentation of vect_permute_store_chain()).
6278 In case of both multiple types and interleaving, above vector stores and
6279 permutation stmts are created for every copy. The result vector stmts are
6280 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6281 STMT_VINFO_RELATED_STMT for the next copies.
6284 prev_stmt_info
= NULL
;
6285 for (j
= 0; j
< ncopies
; j
++)
6292 /* Get vectorized arguments for SLP_NODE. */
6293 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6296 vec_oprnd
= vec_oprnds
[0];
6300 /* For interleaved stores we collect vectorized defs for all the
6301 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6302 used as an input to vect_permute_store_chain(), and OPRNDS as
6303 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6305 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6306 OPRNDS are of size 1. */
6307 next_stmt
= first_stmt
;
6308 for (i
= 0; i
< group_size
; i
++)
6310 /* Since gaps are not supported for interleaved stores,
6311 GROUP_SIZE is the exact number of stmts in the chain.
6312 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6313 there is no interleaving, GROUP_SIZE is 1, and only one
6314 iteration of the loop will be executed. */
6315 gcc_assert (next_stmt
6316 && gimple_assign_single_p (next_stmt
));
6317 op
= gimple_assign_rhs1 (next_stmt
);
6319 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6320 dr_chain
.quick_push (vec_oprnd
);
6321 oprnds
.quick_push (vec_oprnd
);
6322 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6326 /* We should have catched mismatched types earlier. */
6327 gcc_assert (useless_type_conversion_p (vectype
,
6328 TREE_TYPE (vec_oprnd
)));
6329 bool simd_lane_access_p
6330 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6331 if (simd_lane_access_p
6332 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6333 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6334 && integer_zerop (DR_OFFSET (first_dr
))
6335 && integer_zerop (DR_INIT (first_dr
))
6336 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6337 get_alias_set (TREE_TYPE (ref_type
))))
6339 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6340 dataref_offset
= build_int_cst (ref_type
, 0);
6345 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6346 simd_lane_access_p
? loop
: NULL
,
6347 offset
, &dummy
, gsi
, &ptr_incr
,
6348 simd_lane_access_p
, &inv_p
);
6349 gcc_assert (bb_vinfo
|| !inv_p
);
6353 /* For interleaved stores we created vectorized defs for all the
6354 defs stored in OPRNDS in the previous iteration (previous copy).
6355 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6356 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6358 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6359 OPRNDS are of size 1. */
6360 for (i
= 0; i
< group_size
; i
++)
6363 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6364 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6365 dr_chain
[i
] = vec_oprnd
;
6366 oprnds
[i
] = vec_oprnd
;
6370 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6371 TYPE_SIZE_UNIT (aggr_type
));
6373 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6374 TYPE_SIZE_UNIT (aggr_type
));
6377 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6381 /* Combine all the vectors into an array. */
6382 vec_array
= create_vector_array (vectype
, vec_num
);
6383 for (i
= 0; i
< vec_num
; i
++)
6385 vec_oprnd
= dr_chain
[i
];
6386 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6390 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6391 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6392 gcall
*call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
6394 gimple_call_set_lhs (call
, data_ref
);
6395 gimple_call_set_nothrow (call
, true);
6397 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6405 result_chain
.create (group_size
);
6407 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6411 next_stmt
= first_stmt
;
6412 for (i
= 0; i
< vec_num
; i
++)
6414 unsigned align
, misalign
;
6417 /* Bump the vector pointer. */
6418 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6422 vec_oprnd
= vec_oprnds
[i
];
6423 else if (grouped_store
)
6424 /* For grouped stores vectorized defs are interleaved in
6425 vect_permute_store_chain(). */
6426 vec_oprnd
= result_chain
[i
];
6428 data_ref
= fold_build2 (MEM_REF
, vectype
,
6432 : build_int_cst (ref_type
, 0));
6433 align
= DR_TARGET_ALIGNMENT (first_dr
);
6434 if (aligned_access_p (first_dr
))
6436 else if (DR_MISALIGNMENT (first_dr
) == -1)
6438 align
= dr_alignment (vect_dr_behavior (first_dr
));
6440 TREE_TYPE (data_ref
)
6441 = build_aligned_type (TREE_TYPE (data_ref
),
6442 align
* BITS_PER_UNIT
);
6446 TREE_TYPE (data_ref
)
6447 = build_aligned_type (TREE_TYPE (data_ref
),
6448 TYPE_ALIGN (elem_type
));
6449 misalign
= DR_MISALIGNMENT (first_dr
);
6451 if (dataref_offset
== NULL_TREE
6452 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6453 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6456 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6458 tree perm_mask
= perm_mask_for_reverse (vectype
);
6460 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6462 tree new_temp
= make_ssa_name (perm_dest
);
6464 /* Generate the permute statement. */
6466 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6467 vec_oprnd
, perm_mask
);
6468 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6470 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6471 vec_oprnd
= new_temp
;
6474 /* Arguments are ready. Create the new vector stmt. */
6475 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6476 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6481 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6489 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6491 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6492 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6497 result_chain
.release ();
6498 vec_oprnds
.release ();
6503 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6504 VECTOR_CST mask. No checks are made that the target platform supports the
6505 mask, so callers may wish to test can_vec_perm_p separately, or use
6506 vect_gen_perm_mask_checked. */
6509 vect_gen_perm_mask_any (tree vectype
, vec_perm_indices sel
)
6511 tree mask_elt_type
, mask_type
;
6513 unsigned int nunits
= sel
.length ();
6514 gcc_checking_assert (nunits
== TYPE_VECTOR_SUBPARTS (vectype
));
6516 mask_elt_type
= lang_hooks
.types
.type_for_mode
6517 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
6518 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6520 tree_vector_builder
mask_elts (mask_type
, nunits
, 1);
6521 for (unsigned int i
= 0; i
< nunits
; ++i
)
6522 mask_elts
.quick_push (build_int_cst (mask_elt_type
, sel
[i
]));
6523 return mask_elts
.build ();
6526 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6527 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6530 vect_gen_perm_mask_checked (tree vectype
, vec_perm_indices sel
)
6532 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
));
6533 return vect_gen_perm_mask_any (vectype
, sel
);
6536 /* Given a vector variable X and Y, that was generated for the scalar
6537 STMT, generate instructions to permute the vector elements of X and Y
6538 using permutation mask MASK_VEC, insert them at *GSI and return the
6539 permuted vector variable. */
6542 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6543 gimple_stmt_iterator
*gsi
)
6545 tree vectype
= TREE_TYPE (x
);
6546 tree perm_dest
, data_ref
;
6549 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6550 data_ref
= make_ssa_name (perm_dest
);
6552 /* Generate the permute statement. */
6553 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6554 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6559 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6560 inserting them on the loops preheader edge. Returns true if we
6561 were successful in doing so (and thus STMT can be moved then),
6562 otherwise returns false. */
6565 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6571 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6573 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6574 if (!gimple_nop_p (def_stmt
)
6575 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6577 /* Make sure we don't need to recurse. While we could do
6578 so in simple cases when there are more complex use webs
6579 we don't have an easy way to preserve stmt order to fulfil
6580 dependencies within them. */
6583 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6585 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6587 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6588 if (!gimple_nop_p (def_stmt2
)
6589 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6599 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6601 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6602 if (!gimple_nop_p (def_stmt
)
6603 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6605 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6606 gsi_remove (&gsi
, false);
6607 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6614 /* vectorizable_load.
6616 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6618 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6619 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6620 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6623 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6624 slp_tree slp_node
, slp_instance slp_node_instance
)
6627 tree vec_dest
= NULL
;
6628 tree data_ref
= NULL
;
6629 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6630 stmt_vec_info prev_stmt_info
;
6631 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6632 struct loop
*loop
= NULL
;
6633 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6634 bool nested_in_vect_loop
= false;
6635 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6639 gimple
*new_stmt
= NULL
;
6641 enum dr_alignment_support alignment_support_scheme
;
6642 tree dataref_ptr
= NULL_TREE
;
6643 tree dataref_offset
= NULL_TREE
;
6644 gimple
*ptr_incr
= NULL
;
6646 int i
, j
, group_size
, group_gap_adj
;
6647 tree msq
= NULL_TREE
, lsq
;
6648 tree offset
= NULL_TREE
;
6649 tree byte_offset
= NULL_TREE
;
6650 tree realignment_token
= NULL_TREE
;
6652 vec
<tree
> dr_chain
= vNULL
;
6653 bool grouped_load
= false;
6655 gimple
*first_stmt_for_drptr
= NULL
;
6657 bool compute_in_loop
= false;
6658 struct loop
*at_loop
;
6660 bool slp
= (slp_node
!= NULL
);
6661 bool slp_perm
= false;
6662 enum tree_code code
;
6663 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6666 gather_scatter_info gs_info
;
6667 vec_info
*vinfo
= stmt_info
->vinfo
;
6670 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6673 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6677 /* Is vectorizable load? */
6678 if (!is_gimple_assign (stmt
))
6681 scalar_dest
= gimple_assign_lhs (stmt
);
6682 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6685 code
= gimple_assign_rhs_code (stmt
);
6686 if (code
!= ARRAY_REF
6687 && code
!= BIT_FIELD_REF
6688 && code
!= INDIRECT_REF
6689 && code
!= COMPONENT_REF
6690 && code
!= IMAGPART_EXPR
6691 && code
!= REALPART_EXPR
6693 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6696 if (!STMT_VINFO_DATA_REF (stmt_info
))
6699 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6700 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6704 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6705 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6706 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6711 /* Multiple types in SLP are handled by creating the appropriate number of
6712 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6717 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6719 gcc_assert (ncopies
>= 1);
6721 /* FORNOW. This restriction should be relaxed. */
6722 if (nested_in_vect_loop
&& ncopies
> 1)
6724 if (dump_enabled_p ())
6725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6726 "multiple types in nested loop.\n");
6730 /* Invalidate assumptions made by dependence analysis when vectorization
6731 on the unrolled body effectively re-orders stmts. */
6733 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6734 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6735 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6737 if (dump_enabled_p ())
6738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6739 "cannot perform implicit CSE when unrolling "
6740 "with negative dependence distance\n");
6744 elem_type
= TREE_TYPE (vectype
);
6745 mode
= TYPE_MODE (vectype
);
6747 /* FORNOW. In some cases can vectorize even if data-type not supported
6748 (e.g. - data copies). */
6749 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6751 if (dump_enabled_p ())
6752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6753 "Aligned load, but unsupported type.\n");
6757 /* Check if the load is a part of an interleaving chain. */
6758 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6760 grouped_load
= true;
6762 gcc_assert (!nested_in_vect_loop
);
6763 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6765 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6766 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6768 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6771 /* Invalidate assumptions made by dependence analysis when vectorization
6772 on the unrolled body effectively re-orders stmts. */
6773 if (!PURE_SLP_STMT (stmt_info
)
6774 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6775 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6776 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6778 if (dump_enabled_p ())
6779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6780 "cannot perform implicit CSE when performing "
6781 "group loads with negative dependence distance\n");
6785 /* Similarly when the stmt is a load that is both part of a SLP
6786 instance and a loop vectorized stmt via the same-dr mechanism
6787 we have to give up. */
6788 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6789 && (STMT_SLP_TYPE (stmt_info
)
6790 != STMT_SLP_TYPE (vinfo_for_stmt
6791 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6793 if (dump_enabled_p ())
6794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6795 "conflicting SLP types for CSEd load\n");
6800 vect_memory_access_type memory_access_type
;
6801 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6802 &memory_access_type
, &gs_info
))
6805 if (!vec_stmt
) /* transformation not required. */
6808 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6809 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6810 /* The SLP costs are calculated during SLP analysis. */
6811 if (!PURE_SLP_STMT (stmt_info
))
6812 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6818 gcc_assert (memory_access_type
6819 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6821 if (dump_enabled_p ())
6822 dump_printf_loc (MSG_NOTE
, vect_location
,
6823 "transform load. ncopies = %d\n", ncopies
);
6827 ensure_base_align (dr
);
6829 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6831 tree vec_oprnd0
= NULL_TREE
, op
;
6832 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6833 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6834 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6835 edge pe
= loop_preheader_edge (loop
);
6838 enum { NARROW
, NONE
, WIDEN
} modifier
;
6839 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6841 if (nunits
== gather_off_nunits
)
6843 else if (nunits
== gather_off_nunits
/ 2)
6847 auto_vec_perm_indices
sel (gather_off_nunits
);
6848 for (i
= 0; i
< gather_off_nunits
; ++i
)
6849 sel
.quick_push (i
| nunits
);
6851 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
6853 else if (nunits
== gather_off_nunits
* 2)
6857 auto_vec_perm_indices
sel (nunits
);
6858 for (i
= 0; i
< nunits
; ++i
)
6859 sel
.quick_push (i
< gather_off_nunits
6860 ? i
: i
+ nunits
- gather_off_nunits
);
6862 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6868 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6869 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6870 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6871 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6872 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6873 scaletype
= TREE_VALUE (arglist
);
6874 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6876 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6878 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6879 if (!is_gimple_min_invariant (ptr
))
6881 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6882 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6883 gcc_assert (!new_bb
);
6886 /* Currently we support only unconditional gather loads,
6887 so mask should be all ones. */
6888 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6889 mask
= build_int_cst (masktype
, -1);
6890 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6892 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6893 mask
= build_vector_from_val (masktype
, mask
);
6894 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6896 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6900 for (j
= 0; j
< 6; ++j
)
6902 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6903 mask
= build_real (TREE_TYPE (masktype
), r
);
6904 mask
= build_vector_from_val (masktype
, mask
);
6905 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6910 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6912 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6913 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6914 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6918 for (j
= 0; j
< 6; ++j
)
6920 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6921 merge
= build_real (TREE_TYPE (rettype
), r
);
6925 merge
= build_vector_from_val (rettype
, merge
);
6926 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6928 prev_stmt_info
= NULL
;
6929 for (j
= 0; j
< ncopies
; ++j
)
6931 if (modifier
== WIDEN
&& (j
& 1))
6932 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6933 perm_mask
, stmt
, gsi
);
6936 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6939 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6941 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6943 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6944 == TYPE_VECTOR_SUBPARTS (idxtype
));
6945 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6946 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6948 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6949 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6954 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6956 if (!useless_type_conversion_p (vectype
, rettype
))
6958 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6959 == TYPE_VECTOR_SUBPARTS (rettype
));
6960 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6961 gimple_call_set_lhs (new_stmt
, op
);
6962 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6963 var
= make_ssa_name (vec_dest
);
6964 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6966 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6970 var
= make_ssa_name (vec_dest
, new_stmt
);
6971 gimple_call_set_lhs (new_stmt
, var
);
6974 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6976 if (modifier
== NARROW
)
6983 var
= permute_vec_elements (prev_res
, var
,
6984 perm_mask
, stmt
, gsi
);
6985 new_stmt
= SSA_NAME_DEF_STMT (var
);
6988 if (prev_stmt_info
== NULL
)
6989 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6991 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6992 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6997 if (memory_access_type
== VMAT_ELEMENTWISE
6998 || memory_access_type
== VMAT_STRIDED_SLP
)
7000 gimple_stmt_iterator incr_gsi
;
7006 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7007 gimple_seq stmts
= NULL
;
7008 tree stride_base
, stride_step
, alias_off
;
7010 gcc_assert (!nested_in_vect_loop
);
7012 if (slp
&& grouped_load
)
7014 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7015 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7016 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7017 ref_type
= get_group_alias_ptr_type (first_stmt
);
7024 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7028 = fold_build_pointer_plus
7029 (DR_BASE_ADDRESS (first_dr
),
7030 size_binop (PLUS_EXPR
,
7031 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7032 convert_to_ptrofftype (DR_INIT (first_dr
))));
7033 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7035 /* For a load with loop-invariant (but other than power-of-2)
7036 stride (i.e. not a grouped access) like so:
7038 for (i = 0; i < n; i += stride)
7041 we generate a new induction variable and new accesses to
7042 form a new vector (or vectors, depending on ncopies):
7044 for (j = 0; ; j += VF*stride)
7046 tmp2 = array[j + stride];
7048 vectemp = {tmp1, tmp2, ...}
7051 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7052 build_int_cst (TREE_TYPE (stride_step
), vf
));
7054 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7056 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
7057 loop
, &incr_gsi
, insert_after
,
7059 incr
= gsi_stmt (incr_gsi
);
7060 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7062 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
7063 &stmts
, true, NULL_TREE
);
7065 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
7067 prev_stmt_info
= NULL
;
7068 running_off
= offvar
;
7069 alias_off
= build_int_cst (ref_type
, 0);
7070 int nloads
= nunits
;
7072 tree ltype
= TREE_TYPE (vectype
);
7073 tree lvectype
= vectype
;
7074 auto_vec
<tree
> dr_chain
;
7075 if (memory_access_type
== VMAT_STRIDED_SLP
)
7077 if (group_size
< nunits
)
7079 /* First check if vec_init optab supports construction from
7080 vector elts directly. */
7081 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7083 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7084 && VECTOR_MODE_P (vmode
)
7085 && (convert_optab_handler (vec_init_optab
,
7086 TYPE_MODE (vectype
), vmode
)
7087 != CODE_FOR_nothing
))
7089 nloads
= nunits
/ group_size
;
7091 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7095 /* Otherwise avoid emitting a constructor of vector elements
7096 by performing the loads using an integer type of the same
7097 size, constructing a vector of those and then
7098 re-interpreting it as the original vector type.
7099 This avoids a huge runtime penalty due to the general
7100 inability to perform store forwarding from smaller stores
7101 to a larger load. */
7103 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7104 elmode
= int_mode_for_size (lsize
, 0).require ();
7105 /* If we can't construct such a vector fall back to
7106 element loads of the original vector type. */
7107 if (mode_for_vector (elmode
,
7108 nunits
/ group_size
).exists (&vmode
)
7109 && VECTOR_MODE_P (vmode
)
7110 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7111 != CODE_FOR_nothing
))
7113 nloads
= nunits
/ group_size
;
7115 ltype
= build_nonstandard_integer_type (lsize
, 1);
7116 lvectype
= build_vector_type (ltype
, nloads
);
7126 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7130 /* For SLP permutation support we need to load the whole group,
7131 not only the number of vector stmts the permutation result
7135 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
7136 dr_chain
.create (ncopies
);
7139 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7142 unsigned HOST_WIDE_INT
7143 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7144 for (j
= 0; j
< ncopies
; j
++)
7147 vec_alloc (v
, nloads
);
7148 for (i
= 0; i
< nloads
; i
++)
7150 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7152 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7153 build2 (MEM_REF
, ltype
,
7154 running_off
, this_off
));
7155 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7157 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7158 gimple_assign_lhs (new_stmt
));
7162 || group_el
== group_size
)
7164 tree newoff
= copy_ssa_name (running_off
);
7165 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7166 running_off
, stride_step
);
7167 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7169 running_off
= newoff
;
7175 tree vec_inv
= build_constructor (lvectype
, v
);
7176 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7177 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7178 if (lvectype
!= vectype
)
7180 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7182 build1 (VIEW_CONVERT_EXPR
,
7183 vectype
, new_temp
));
7184 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7191 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7193 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7198 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7200 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7201 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7207 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7208 slp_node_instance
, false, &n_perms
);
7215 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7216 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7217 /* For SLP vectorization we directly vectorize a subchain
7218 without permutation. */
7219 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7220 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7221 /* For BB vectorization always use the first stmt to base
7222 the data ref pointer on. */
7224 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7226 /* Check if the chain of loads is already vectorized. */
7227 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7228 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7229 ??? But we can only do so if there is exactly one
7230 as we have no way to get at the rest. Leave the CSE
7232 ??? With the group load eventually participating
7233 in multiple different permutations (having multiple
7234 slp nodes which refer to the same group) the CSE
7235 is even wrong code. See PR56270. */
7238 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7241 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7244 /* VEC_NUM is the number of vect stmts to be created for this group. */
7247 grouped_load
= false;
7248 /* For SLP permutation support we need to load the whole group,
7249 not only the number of vector stmts the permutation result
7253 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
7254 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7258 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7260 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7264 vec_num
= group_size
;
7266 ref_type
= get_group_alias_ptr_type (first_stmt
);
7272 group_size
= vec_num
= 1;
7274 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7277 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7278 gcc_assert (alignment_support_scheme
);
7279 /* Targets with load-lane instructions must not require explicit
7281 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7282 || alignment_support_scheme
== dr_aligned
7283 || alignment_support_scheme
== dr_unaligned_supported
);
7285 /* In case the vectorization factor (VF) is bigger than the number
7286 of elements that we can fit in a vectype (nunits), we have to generate
7287 more than one vector stmt - i.e - we need to "unroll" the
7288 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7289 from one copy of the vector stmt to the next, in the field
7290 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7291 stages to find the correct vector defs to be used when vectorizing
7292 stmts that use the defs of the current stmt. The example below
7293 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7294 need to create 4 vectorized stmts):
7296 before vectorization:
7297 RELATED_STMT VEC_STMT
7301 step 1: vectorize stmt S1:
7302 We first create the vector stmt VS1_0, and, as usual, record a
7303 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7304 Next, we create the vector stmt VS1_1, and record a pointer to
7305 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7306 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7308 RELATED_STMT VEC_STMT
7309 VS1_0: vx0 = memref0 VS1_1 -
7310 VS1_1: vx1 = memref1 VS1_2 -
7311 VS1_2: vx2 = memref2 VS1_3 -
7312 VS1_3: vx3 = memref3 - -
7313 S1: x = load - VS1_0
7316 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7317 information we recorded in RELATED_STMT field is used to vectorize
7320 /* In case of interleaving (non-unit grouped access):
7327 Vectorized loads are created in the order of memory accesses
7328 starting from the access of the first stmt of the chain:
7331 VS2: vx1 = &base + vec_size*1
7332 VS3: vx3 = &base + vec_size*2
7333 VS4: vx4 = &base + vec_size*3
7335 Then permutation statements are generated:
7337 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7338 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7341 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7342 (the order of the data-refs in the output of vect_permute_load_chain
7343 corresponds to the order of scalar stmts in the interleaving chain - see
7344 the documentation of vect_permute_load_chain()).
7345 The generation of permutation stmts and recording them in
7346 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7348 In case of both multiple types and interleaving, the vector loads and
7349 permutation stmts above are created for every copy. The result vector
7350 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7351 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7353 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7354 on a target that supports unaligned accesses (dr_unaligned_supported)
7355 we generate the following code:
7359 p = p + indx * vectype_size;
7364 Otherwise, the data reference is potentially unaligned on a target that
7365 does not support unaligned accesses (dr_explicit_realign_optimized) -
7366 then generate the following code, in which the data in each iteration is
7367 obtained by two vector loads, one from the previous iteration, and one
7368 from the current iteration:
7370 msq_init = *(floor(p1))
7371 p2 = initial_addr + VS - 1;
7372 realignment_token = call target_builtin;
7375 p2 = p2 + indx * vectype_size
7377 vec_dest = realign_load (msq, lsq, realignment_token)
7382 /* If the misalignment remains the same throughout the execution of the
7383 loop, we can create the init_addr and permutation mask at the loop
7384 preheader. Otherwise, it needs to be created inside the loop.
7385 This can only occur when vectorizing memory accesses in the inner-loop
7386 nested within an outer-loop that is being vectorized. */
7388 if (nested_in_vect_loop
7389 && (DR_STEP_ALIGNMENT (dr
) % GET_MODE_SIZE (TYPE_MODE (vectype
))) != 0)
7391 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7392 compute_in_loop
= true;
7395 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7396 || alignment_support_scheme
== dr_explicit_realign
)
7397 && !compute_in_loop
)
7399 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7400 alignment_support_scheme
, NULL_TREE
,
7402 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7404 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7405 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7412 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7413 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7415 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7416 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7418 aggr_type
= vectype
;
7420 prev_stmt_info
= NULL
;
7422 for (j
= 0; j
< ncopies
; j
++)
7424 /* 1. Create the vector or array pointer update chain. */
7427 bool simd_lane_access_p
7428 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7429 if (simd_lane_access_p
7430 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7431 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7432 && integer_zerop (DR_OFFSET (first_dr
))
7433 && integer_zerop (DR_INIT (first_dr
))
7434 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7435 get_alias_set (TREE_TYPE (ref_type
)))
7436 && (alignment_support_scheme
== dr_aligned
7437 || alignment_support_scheme
== dr_unaligned_supported
))
7439 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7440 dataref_offset
= build_int_cst (ref_type
, 0);
7443 else if (first_stmt_for_drptr
7444 && first_stmt
!= first_stmt_for_drptr
)
7447 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7448 at_loop
, offset
, &dummy
, gsi
,
7449 &ptr_incr
, simd_lane_access_p
,
7450 &inv_p
, byte_offset
);
7451 /* Adjust the pointer by the difference to first_stmt. */
7452 data_reference_p ptrdr
7453 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7454 tree diff
= fold_convert (sizetype
,
7455 size_binop (MINUS_EXPR
,
7458 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7463 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7464 offset
, &dummy
, gsi
, &ptr_incr
,
7465 simd_lane_access_p
, &inv_p
,
7468 else if (dataref_offset
)
7469 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7470 TYPE_SIZE_UNIT (aggr_type
));
7472 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7473 TYPE_SIZE_UNIT (aggr_type
));
7475 if (grouped_load
|| slp_perm
)
7476 dr_chain
.create (vec_num
);
7478 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7482 vec_array
= create_vector_array (vectype
, vec_num
);
7485 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7486 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7487 gcall
*call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1,
7489 gimple_call_set_lhs (call
, vec_array
);
7490 gimple_call_set_nothrow (call
, true);
7492 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7494 /* Extract each vector into an SSA_NAME. */
7495 for (i
= 0; i
< vec_num
; i
++)
7497 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7499 dr_chain
.quick_push (new_temp
);
7502 /* Record the mapping between SSA_NAMEs and statements. */
7503 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7507 for (i
= 0; i
< vec_num
; i
++)
7510 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7513 /* 2. Create the vector-load in the loop. */
7514 switch (alignment_support_scheme
)
7517 case dr_unaligned_supported
:
7519 unsigned int align
, misalign
;
7522 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7525 : build_int_cst (ref_type
, 0));
7526 align
= DR_TARGET_ALIGNMENT (dr
);
7527 if (alignment_support_scheme
== dr_aligned
)
7529 gcc_assert (aligned_access_p (first_dr
));
7532 else if (DR_MISALIGNMENT (first_dr
) == -1)
7534 align
= dr_alignment (vect_dr_behavior (first_dr
));
7536 TREE_TYPE (data_ref
)
7537 = build_aligned_type (TREE_TYPE (data_ref
),
7538 align
* BITS_PER_UNIT
);
7542 TREE_TYPE (data_ref
)
7543 = build_aligned_type (TREE_TYPE (data_ref
),
7544 TYPE_ALIGN (elem_type
));
7545 misalign
= DR_MISALIGNMENT (first_dr
);
7547 if (dataref_offset
== NULL_TREE
7548 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7549 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7553 case dr_explicit_realign
:
7557 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7559 if (compute_in_loop
)
7560 msq
= vect_setup_realignment (first_stmt
, gsi
,
7562 dr_explicit_realign
,
7565 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7566 ptr
= copy_ssa_name (dataref_ptr
);
7568 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7569 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7570 new_stmt
= gimple_build_assign
7571 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7573 (TREE_TYPE (dataref_ptr
),
7574 -(HOST_WIDE_INT
) align
));
7575 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7577 = build2 (MEM_REF
, vectype
, ptr
,
7578 build_int_cst (ref_type
, 0));
7579 vec_dest
= vect_create_destination_var (scalar_dest
,
7581 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7582 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7583 gimple_assign_set_lhs (new_stmt
, new_temp
);
7584 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7585 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7586 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7589 bump
= size_binop (MULT_EXPR
, vs
,
7590 TYPE_SIZE_UNIT (elem_type
));
7591 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7592 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7593 new_stmt
= gimple_build_assign
7594 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7596 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
7597 ptr
= copy_ssa_name (ptr
, new_stmt
);
7598 gimple_assign_set_lhs (new_stmt
, ptr
);
7599 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7601 = build2 (MEM_REF
, vectype
, ptr
,
7602 build_int_cst (ref_type
, 0));
7605 case dr_explicit_realign_optimized
:
7607 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7608 new_temp
= copy_ssa_name (dataref_ptr
);
7610 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7611 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7612 new_stmt
= gimple_build_assign
7613 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7614 build_int_cst (TREE_TYPE (dataref_ptr
),
7615 -(HOST_WIDE_INT
) align
));
7616 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7618 = build2 (MEM_REF
, vectype
, new_temp
,
7619 build_int_cst (ref_type
, 0));
7625 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7626 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7627 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7628 gimple_assign_set_lhs (new_stmt
, new_temp
);
7629 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7631 /* 3. Handle explicit realignment if necessary/supported.
7633 vec_dest = realign_load (msq, lsq, realignment_token) */
7634 if (alignment_support_scheme
== dr_explicit_realign_optimized
7635 || alignment_support_scheme
== dr_explicit_realign
)
7637 lsq
= gimple_assign_lhs (new_stmt
);
7638 if (!realignment_token
)
7639 realignment_token
= dataref_ptr
;
7640 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7641 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7642 msq
, lsq
, realignment_token
);
7643 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7644 gimple_assign_set_lhs (new_stmt
, new_temp
);
7645 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7647 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7650 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7651 add_phi_arg (phi
, lsq
,
7652 loop_latch_edge (containing_loop
),
7658 /* 4. Handle invariant-load. */
7659 if (inv_p
&& !bb_vinfo
)
7661 gcc_assert (!grouped_load
);
7662 /* If we have versioned for aliasing or the loop doesn't
7663 have any data dependencies that would preclude this,
7664 then we are sure this is a loop invariant load and
7665 thus we can insert it on the preheader edge. */
7666 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7667 && !nested_in_vect_loop
7668 && hoist_defs_of_uses (stmt
, loop
))
7670 if (dump_enabled_p ())
7672 dump_printf_loc (MSG_NOTE
, vect_location
,
7673 "hoisting out of the vectorized "
7675 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7677 tree tem
= copy_ssa_name (scalar_dest
);
7678 gsi_insert_on_edge_immediate
7679 (loop_preheader_edge (loop
),
7680 gimple_build_assign (tem
,
7682 (gimple_assign_rhs1 (stmt
))));
7683 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7684 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7685 set_vinfo_for_stmt (new_stmt
,
7686 new_stmt_vec_info (new_stmt
, vinfo
));
7690 gimple_stmt_iterator gsi2
= *gsi
;
7692 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7694 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7698 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7700 tree perm_mask
= perm_mask_for_reverse (vectype
);
7701 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7702 perm_mask
, stmt
, gsi
);
7703 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7706 /* Collect vector loads and later create their permutation in
7707 vect_transform_grouped_load (). */
7708 if (grouped_load
|| slp_perm
)
7709 dr_chain
.quick_push (new_temp
);
7711 /* Store vector loads in the corresponding SLP_NODE. */
7712 if (slp
&& !slp_perm
)
7713 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7715 /* With SLP permutation we load the gaps as well, without
7716 we need to skip the gaps after we manage to fully load
7717 all elements. group_gap_adj is GROUP_SIZE here. */
7718 group_elt
+= nunits
;
7719 if (group_gap_adj
!= 0 && ! slp_perm
7720 && group_elt
== group_size
- group_gap_adj
)
7722 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7724 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7725 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7730 /* Bump the vector pointer to account for a gap or for excess
7731 elements loaded for a permuted SLP load. */
7732 if (group_gap_adj
!= 0 && slp_perm
)
7734 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7736 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7737 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7742 if (slp
&& !slp_perm
)
7748 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7749 slp_node_instance
, false,
7752 dr_chain
.release ();
7760 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7761 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7762 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7767 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7769 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7770 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7773 dr_chain
.release ();
7779 /* Function vect_is_simple_cond.
7782 LOOP - the loop that is being vectorized.
7783 COND - Condition that is checked for simple use.
7786 *COMP_VECTYPE - the vector type for the comparison.
7787 *DTS - The def types for the arguments of the comparison
7789 Returns whether a COND can be vectorized. Checks whether
7790 condition operands are supportable using vec_is_simple_use. */
7793 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
7794 tree
*comp_vectype
, enum vect_def_type
*dts
)
7797 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7800 if (TREE_CODE (cond
) == SSA_NAME
7801 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
7803 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7804 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7805 &dts
[0], comp_vectype
)
7807 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7812 if (!COMPARISON_CLASS_P (cond
))
7815 lhs
= TREE_OPERAND (cond
, 0);
7816 rhs
= TREE_OPERAND (cond
, 1);
7818 if (TREE_CODE (lhs
) == SSA_NAME
)
7820 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7821 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
7824 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
7825 || TREE_CODE (lhs
) == FIXED_CST
)
7826 dts
[0] = vect_constant_def
;
7830 if (TREE_CODE (rhs
) == SSA_NAME
)
7832 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7833 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
7836 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
7837 || TREE_CODE (rhs
) == FIXED_CST
)
7838 dts
[1] = vect_constant_def
;
7842 if (vectype1
&& vectype2
7843 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7846 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7850 /* vectorizable_condition.
7852 Check if STMT is conditional modify expression that can be vectorized.
7853 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7854 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7857 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7858 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7859 else clause if it is 2).
7861 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7864 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7865 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7868 tree scalar_dest
= NULL_TREE
;
7869 tree vec_dest
= NULL_TREE
;
7870 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
7871 tree then_clause
, else_clause
;
7872 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7873 tree comp_vectype
= NULL_TREE
;
7874 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7875 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7878 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7879 enum vect_def_type dts
[4]
7880 = {vect_unknown_def_type
, vect_unknown_def_type
,
7881 vect_unknown_def_type
, vect_unknown_def_type
};
7884 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7885 stmt_vec_info prev_stmt_info
= NULL
;
7887 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7888 vec
<tree
> vec_oprnds0
= vNULL
;
7889 vec
<tree
> vec_oprnds1
= vNULL
;
7890 vec
<tree
> vec_oprnds2
= vNULL
;
7891 vec
<tree
> vec_oprnds3
= vNULL
;
7893 bool masked
= false;
7895 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7898 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7900 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7903 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7904 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7908 /* FORNOW: not yet supported. */
7909 if (STMT_VINFO_LIVE_P (stmt_info
))
7911 if (dump_enabled_p ())
7912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7913 "value used after loop.\n");
7918 /* Is vectorizable conditional operation? */
7919 if (!is_gimple_assign (stmt
))
7922 code
= gimple_assign_rhs_code (stmt
);
7924 if (code
!= COND_EXPR
)
7927 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7928 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7933 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7935 gcc_assert (ncopies
>= 1);
7936 if (reduc_index
&& ncopies
> 1)
7937 return false; /* FORNOW */
7939 cond_expr
= gimple_assign_rhs1 (stmt
);
7940 then_clause
= gimple_assign_rhs2 (stmt
);
7941 else_clause
= gimple_assign_rhs3 (stmt
);
7943 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
7944 &comp_vectype
, &dts
[0])
7949 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
7952 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
7956 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7959 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7962 masked
= !COMPARISON_CLASS_P (cond_expr
);
7963 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7965 if (vec_cmp_type
== NULL_TREE
)
7968 cond_code
= TREE_CODE (cond_expr
);
7971 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
7972 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
7975 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
7977 /* Boolean values may have another representation in vectors
7978 and therefore we prefer bit operations over comparison for
7979 them (which also works for scalar masks). We store opcodes
7980 to use in bitop1 and bitop2. Statement is vectorized as
7981 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7982 depending on bitop1 and bitop2 arity. */
7986 bitop1
= BIT_NOT_EXPR
;
7987 bitop2
= BIT_AND_EXPR
;
7990 bitop1
= BIT_NOT_EXPR
;
7991 bitop2
= BIT_IOR_EXPR
;
7994 bitop1
= BIT_NOT_EXPR
;
7995 bitop2
= BIT_AND_EXPR
;
7996 std::swap (cond_expr0
, cond_expr1
);
7999 bitop1
= BIT_NOT_EXPR
;
8000 bitop2
= BIT_IOR_EXPR
;
8001 std::swap (cond_expr0
, cond_expr1
);
8004 bitop1
= BIT_XOR_EXPR
;
8007 bitop1
= BIT_XOR_EXPR
;
8008 bitop2
= BIT_NOT_EXPR
;
8013 cond_code
= SSA_NAME
;
8018 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8019 if (bitop1
!= NOP_EXPR
)
8021 machine_mode mode
= TYPE_MODE (comp_vectype
);
8024 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8025 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8028 if (bitop2
!= NOP_EXPR
)
8030 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8032 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8036 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8039 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
8049 vec_oprnds0
.create (1);
8050 vec_oprnds1
.create (1);
8051 vec_oprnds2
.create (1);
8052 vec_oprnds3
.create (1);
8056 scalar_dest
= gimple_assign_lhs (stmt
);
8057 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8059 /* Handle cond expr. */
8060 for (j
= 0; j
< ncopies
; j
++)
8062 gassign
*new_stmt
= NULL
;
8067 auto_vec
<tree
, 4> ops
;
8068 auto_vec
<vec
<tree
>, 4> vec_defs
;
8071 ops
.safe_push (cond_expr
);
8074 ops
.safe_push (cond_expr0
);
8075 ops
.safe_push (cond_expr1
);
8077 ops
.safe_push (then_clause
);
8078 ops
.safe_push (else_clause
);
8079 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8080 vec_oprnds3
= vec_defs
.pop ();
8081 vec_oprnds2
= vec_defs
.pop ();
8083 vec_oprnds1
= vec_defs
.pop ();
8084 vec_oprnds0
= vec_defs
.pop ();
8092 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8094 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8100 = vect_get_vec_def_for_operand (cond_expr0
,
8101 stmt
, comp_vectype
);
8102 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8105 = vect_get_vec_def_for_operand (cond_expr1
,
8106 stmt
, comp_vectype
);
8107 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8109 if (reduc_index
== 1)
8110 vec_then_clause
= reduc_def
;
8113 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8115 vect_is_simple_use (then_clause
, loop_vinfo
,
8118 if (reduc_index
== 2)
8119 vec_else_clause
= reduc_def
;
8122 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8124 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8131 = vect_get_vec_def_for_stmt_copy (dts
[0],
8132 vec_oprnds0
.pop ());
8135 = vect_get_vec_def_for_stmt_copy (dts
[1],
8136 vec_oprnds1
.pop ());
8138 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8139 vec_oprnds2
.pop ());
8140 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8141 vec_oprnds3
.pop ());
8146 vec_oprnds0
.quick_push (vec_cond_lhs
);
8148 vec_oprnds1
.quick_push (vec_cond_rhs
);
8149 vec_oprnds2
.quick_push (vec_then_clause
);
8150 vec_oprnds3
.quick_push (vec_else_clause
);
8153 /* Arguments are ready. Create the new vector stmt. */
8154 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8156 vec_then_clause
= vec_oprnds2
[i
];
8157 vec_else_clause
= vec_oprnds3
[i
];
8160 vec_compare
= vec_cond_lhs
;
8163 vec_cond_rhs
= vec_oprnds1
[i
];
8164 if (bitop1
== NOP_EXPR
)
8165 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8166 vec_cond_lhs
, vec_cond_rhs
);
8169 new_temp
= make_ssa_name (vec_cmp_type
);
8170 if (bitop1
== BIT_NOT_EXPR
)
8171 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8175 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8177 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8178 if (bitop2
== NOP_EXPR
)
8179 vec_compare
= new_temp
;
8180 else if (bitop2
== BIT_NOT_EXPR
)
8182 /* Instead of doing ~x ? y : z do x ? z : y. */
8183 vec_compare
= new_temp
;
8184 std::swap (vec_then_clause
, vec_else_clause
);
8188 vec_compare
= make_ssa_name (vec_cmp_type
);
8190 = gimple_build_assign (vec_compare
, bitop2
,
8191 vec_cond_lhs
, new_temp
);
8192 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8196 new_temp
= make_ssa_name (vec_dest
);
8197 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8198 vec_compare
, vec_then_clause
,
8200 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8202 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8209 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8211 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8213 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8216 vec_oprnds0
.release ();
8217 vec_oprnds1
.release ();
8218 vec_oprnds2
.release ();
8219 vec_oprnds3
.release ();
8224 /* vectorizable_comparison.
8226 Check if STMT is comparison expression that can be vectorized.
8227 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8228 comparison, put it in VEC_STMT, and insert it at GSI.
8230 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8233 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8234 gimple
**vec_stmt
, tree reduc_def
,
8237 tree lhs
, rhs1
, rhs2
;
8238 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8239 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8240 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8241 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8243 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8244 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8248 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8249 stmt_vec_info prev_stmt_info
= NULL
;
8251 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8252 vec
<tree
> vec_oprnds0
= vNULL
;
8253 vec
<tree
> vec_oprnds1
= vNULL
;
8258 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8261 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8264 mask_type
= vectype
;
8265 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8270 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8272 gcc_assert (ncopies
>= 1);
8273 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8274 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8278 if (STMT_VINFO_LIVE_P (stmt_info
))
8280 if (dump_enabled_p ())
8281 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8282 "value used after loop.\n");
8286 if (!is_gimple_assign (stmt
))
8289 code
= gimple_assign_rhs_code (stmt
);
8291 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8294 rhs1
= gimple_assign_rhs1 (stmt
);
8295 rhs2
= gimple_assign_rhs2 (stmt
);
8297 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8298 &dts
[0], &vectype1
))
8301 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8302 &dts
[1], &vectype2
))
8305 if (vectype1
&& vectype2
8306 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
8309 vectype
= vectype1
? vectype1
: vectype2
;
8311 /* Invariant comparison. */
8314 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8315 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
8318 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
8321 /* Can't compare mask and non-mask types. */
8322 if (vectype1
&& vectype2
8323 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8326 /* Boolean values may have another representation in vectors
8327 and therefore we prefer bit operations over comparison for
8328 them (which also works for scalar masks). We store opcodes
8329 to use in bitop1 and bitop2. Statement is vectorized as
8330 BITOP2 (rhs1 BITOP1 rhs2) or
8331 rhs1 BITOP2 (BITOP1 rhs2)
8332 depending on bitop1 and bitop2 arity. */
8333 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8335 if (code
== GT_EXPR
)
8337 bitop1
= BIT_NOT_EXPR
;
8338 bitop2
= BIT_AND_EXPR
;
8340 else if (code
== GE_EXPR
)
8342 bitop1
= BIT_NOT_EXPR
;
8343 bitop2
= BIT_IOR_EXPR
;
8345 else if (code
== LT_EXPR
)
8347 bitop1
= BIT_NOT_EXPR
;
8348 bitop2
= BIT_AND_EXPR
;
8349 std::swap (rhs1
, rhs2
);
8350 std::swap (dts
[0], dts
[1]);
8352 else if (code
== LE_EXPR
)
8354 bitop1
= BIT_NOT_EXPR
;
8355 bitop2
= BIT_IOR_EXPR
;
8356 std::swap (rhs1
, rhs2
);
8357 std::swap (dts
[0], dts
[1]);
8361 bitop1
= BIT_XOR_EXPR
;
8362 if (code
== EQ_EXPR
)
8363 bitop2
= BIT_NOT_EXPR
;
8369 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8370 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8371 dts
, ndts
, NULL
, NULL
);
8372 if (bitop1
== NOP_EXPR
)
8373 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8376 machine_mode mode
= TYPE_MODE (vectype
);
8379 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8380 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8383 if (bitop2
!= NOP_EXPR
)
8385 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8386 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8396 vec_oprnds0
.create (1);
8397 vec_oprnds1
.create (1);
8401 lhs
= gimple_assign_lhs (stmt
);
8402 mask
= vect_create_destination_var (lhs
, mask_type
);
8404 /* Handle cmp expr. */
8405 for (j
= 0; j
< ncopies
; j
++)
8407 gassign
*new_stmt
= NULL
;
8412 auto_vec
<tree
, 2> ops
;
8413 auto_vec
<vec
<tree
>, 2> vec_defs
;
8415 ops
.safe_push (rhs1
);
8416 ops
.safe_push (rhs2
);
8417 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8418 vec_oprnds1
= vec_defs
.pop ();
8419 vec_oprnds0
= vec_defs
.pop ();
8423 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8424 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8429 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8430 vec_oprnds0
.pop ());
8431 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8432 vec_oprnds1
.pop ());
8437 vec_oprnds0
.quick_push (vec_rhs1
);
8438 vec_oprnds1
.quick_push (vec_rhs2
);
8441 /* Arguments are ready. Create the new vector stmt. */
8442 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8444 vec_rhs2
= vec_oprnds1
[i
];
8446 new_temp
= make_ssa_name (mask
);
8447 if (bitop1
== NOP_EXPR
)
8449 new_stmt
= gimple_build_assign (new_temp
, code
,
8450 vec_rhs1
, vec_rhs2
);
8451 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8455 if (bitop1
== BIT_NOT_EXPR
)
8456 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8458 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8460 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8461 if (bitop2
!= NOP_EXPR
)
8463 tree res
= make_ssa_name (mask
);
8464 if (bitop2
== BIT_NOT_EXPR
)
8465 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8467 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8469 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8473 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8480 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8482 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8484 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8487 vec_oprnds0
.release ();
8488 vec_oprnds1
.release ();
8493 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8494 can handle all live statements in the node. Otherwise return true
8495 if STMT is not live or if vectorizable_live_operation can handle it.
8496 GSI and VEC_STMT are as for vectorizable_live_operation. */
8499 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8500 slp_tree slp_node
, gimple
**vec_stmt
)
8506 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8508 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8509 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8510 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8515 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
8516 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
))
8522 /* Make sure the statement is vectorizable. */
8525 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
8526 slp_instance node_instance
)
8528 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8529 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8530 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8532 gimple
*pattern_stmt
;
8533 gimple_seq pattern_def_seq
;
8535 if (dump_enabled_p ())
8537 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8538 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8541 if (gimple_has_volatile_ops (stmt
))
8543 if (dump_enabled_p ())
8544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8545 "not vectorized: stmt has volatile operands\n");
8550 /* Skip stmts that do not need to be vectorized. In loops this is expected
8552 - the COND_EXPR which is the loop exit condition
8553 - any LABEL_EXPRs in the loop
8554 - computations that are used only for array indexing or loop control.
8555 In basic blocks we only analyze statements that are a part of some SLP
8556 instance, therefore, all the statements are relevant.
8558 Pattern statement needs to be analyzed instead of the original statement
8559 if the original statement is not relevant. Otherwise, we analyze both
8560 statements. In basic blocks we are called from some SLP instance
8561 traversal, don't analyze pattern stmts instead, the pattern stmts
8562 already will be part of SLP instance. */
8564 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8565 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8566 && !STMT_VINFO_LIVE_P (stmt_info
))
8568 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8570 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8571 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8573 /* Analyze PATTERN_STMT instead of the original stmt. */
8574 stmt
= pattern_stmt
;
8575 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8576 if (dump_enabled_p ())
8578 dump_printf_loc (MSG_NOTE
, vect_location
,
8579 "==> examining pattern statement: ");
8580 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8585 if (dump_enabled_p ())
8586 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8591 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8594 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8595 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8597 /* Analyze PATTERN_STMT too. */
8598 if (dump_enabled_p ())
8600 dump_printf_loc (MSG_NOTE
, vect_location
,
8601 "==> examining pattern statement: ");
8602 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8605 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
8610 if (is_pattern_stmt_p (stmt_info
)
8612 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8614 gimple_stmt_iterator si
;
8616 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8618 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8619 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8620 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8622 /* Analyze def stmt of STMT if it's a pattern stmt. */
8623 if (dump_enabled_p ())
8625 dump_printf_loc (MSG_NOTE
, vect_location
,
8626 "==> examining pattern def statement: ");
8627 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8630 if (!vect_analyze_stmt (pattern_def_stmt
,
8631 need_to_vectorize
, node
, node_instance
))
8637 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8639 case vect_internal_def
:
8642 case vect_reduction_def
:
8643 case vect_nested_cycle
:
8644 gcc_assert (!bb_vinfo
8645 && (relevance
== vect_used_in_outer
8646 || relevance
== vect_used_in_outer_by_reduction
8647 || relevance
== vect_used_by_reduction
8648 || relevance
== vect_unused_in_scope
8649 || relevance
== vect_used_only_live
));
8652 case vect_induction_def
:
8653 gcc_assert (!bb_vinfo
);
8656 case vect_constant_def
:
8657 case vect_external_def
:
8658 case vect_unknown_def_type
:
8663 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8665 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8666 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8667 || (is_gimple_call (stmt
)
8668 && gimple_call_lhs (stmt
) == NULL_TREE
));
8669 *need_to_vectorize
= true;
8672 if (PURE_SLP_STMT (stmt_info
) && !node
)
8674 dump_printf_loc (MSG_NOTE
, vect_location
,
8675 "handled only by SLP analysis\n");
8681 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8682 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8683 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8684 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8685 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8686 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8687 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8688 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8689 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8690 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8691 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
)
8692 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
8693 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8694 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8698 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8699 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8700 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8701 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8702 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8703 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8704 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8705 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8706 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8707 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8712 if (dump_enabled_p ())
8714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8715 "not vectorized: relevant stmt not ");
8716 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8717 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8726 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8727 need extra handling, except for vectorizable reductions. */
8728 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8729 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
))
8731 if (dump_enabled_p ())
8733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8734 "not vectorized: live stmt not supported: ");
8735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8745 /* Function vect_transform_stmt.
8747 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8750 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8751 bool *grouped_store
, slp_tree slp_node
,
8752 slp_instance slp_node_instance
)
8754 bool is_store
= false;
8755 gimple
*vec_stmt
= NULL
;
8756 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8759 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8760 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8762 switch (STMT_VINFO_TYPE (stmt_info
))
8764 case type_demotion_vec_info_type
:
8765 case type_promotion_vec_info_type
:
8766 case type_conversion_vec_info_type
:
8767 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8771 case induc_vec_info_type
:
8772 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
8776 case shift_vec_info_type
:
8777 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8781 case op_vec_info_type
:
8782 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8786 case assignment_vec_info_type
:
8787 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8791 case load_vec_info_type
:
8792 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8797 case store_vec_info_type
:
8798 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8800 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8802 /* In case of interleaving, the whole chain is vectorized when the
8803 last store in the chain is reached. Store stmts before the last
8804 one are skipped, and there vec_stmt_info shouldn't be freed
8806 *grouped_store
= true;
8807 if (STMT_VINFO_VEC_STMT (stmt_info
))
8814 case condition_vec_info_type
:
8815 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8819 case comparison_vec_info_type
:
8820 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8824 case call_vec_info_type
:
8825 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8826 stmt
= gsi_stmt (*gsi
);
8827 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
8831 case call_simd_clone_vec_info_type
:
8832 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8833 stmt
= gsi_stmt (*gsi
);
8836 case reduc_vec_info_type
:
8837 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
8843 if (!STMT_VINFO_LIVE_P (stmt_info
))
8845 if (dump_enabled_p ())
8846 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8847 "stmt not supported.\n");
8852 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8853 This would break hybrid SLP vectorization. */
8855 gcc_assert (!vec_stmt
8856 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8858 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8859 is being vectorized, but outside the immediately enclosing loop. */
8861 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8862 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8863 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8864 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8865 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8866 || STMT_VINFO_RELEVANT (stmt_info
) ==
8867 vect_used_in_outer_by_reduction
))
8869 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8870 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8871 imm_use_iterator imm_iter
;
8872 use_operand_p use_p
;
8876 if (dump_enabled_p ())
8877 dump_printf_loc (MSG_NOTE
, vect_location
,
8878 "Record the vdef for outer-loop vectorization.\n");
8880 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8881 (to be used when vectorizing outer-loop stmts that use the DEF of
8883 if (gimple_code (stmt
) == GIMPLE_PHI
)
8884 scalar_dest
= PHI_RESULT (stmt
);
8886 scalar_dest
= gimple_assign_lhs (stmt
);
8888 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8890 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8892 exit_phi
= USE_STMT (use_p
);
8893 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8898 /* Handle stmts whose DEF is used outside the loop-nest that is
8899 being vectorized. */
8900 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8902 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
);
8907 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8913 /* Remove a group of stores (for SLP or interleaving), free their
8917 vect_remove_stores (gimple
*first_stmt
)
8919 gimple
*next
= first_stmt
;
8921 gimple_stmt_iterator next_si
;
8925 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8927 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8928 if (is_pattern_stmt_p (stmt_info
))
8929 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8930 /* Free the attached stmt_vec_info and remove the stmt. */
8931 next_si
= gsi_for_stmt (next
);
8932 unlink_stmt_vdef (next
);
8933 gsi_remove (&next_si
, true);
8934 release_defs (next
);
8935 free_stmt_vec_info (next
);
8941 /* Function new_stmt_vec_info.
8943 Create and initialize a new stmt_vec_info struct for STMT. */
8946 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8949 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8951 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8952 STMT_VINFO_STMT (res
) = stmt
;
8954 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8955 STMT_VINFO_LIVE_P (res
) = false;
8956 STMT_VINFO_VECTYPE (res
) = NULL
;
8957 STMT_VINFO_VEC_STMT (res
) = NULL
;
8958 STMT_VINFO_VECTORIZABLE (res
) = true;
8959 STMT_VINFO_IN_PATTERN_P (res
) = false;
8960 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8961 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8962 STMT_VINFO_DATA_REF (res
) = NULL
;
8963 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8964 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
8966 if (gimple_code (stmt
) == GIMPLE_PHI
8967 && is_loop_header_bb_p (gimple_bb (stmt
)))
8968 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8970 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8972 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8973 STMT_SLP_TYPE (res
) = loop_vect
;
8974 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8976 GROUP_FIRST_ELEMENT (res
) = NULL
;
8977 GROUP_NEXT_ELEMENT (res
) = NULL
;
8978 GROUP_SIZE (res
) = 0;
8979 GROUP_STORE_COUNT (res
) = 0;
8980 GROUP_GAP (res
) = 0;
8981 GROUP_SAME_DR_STMT (res
) = NULL
;
8987 /* Create a hash table for stmt_vec_info. */
8990 init_stmt_vec_info_vec (void)
8992 gcc_assert (!stmt_vec_info_vec
.exists ());
8993 stmt_vec_info_vec
.create (50);
8997 /* Free hash table for stmt_vec_info. */
9000 free_stmt_vec_info_vec (void)
9004 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
9006 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9007 gcc_assert (stmt_vec_info_vec
.exists ());
9008 stmt_vec_info_vec
.release ();
9012 /* Free stmt vectorization related info. */
9015 free_stmt_vec_info (gimple
*stmt
)
9017 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9022 /* Check if this statement has a related "pattern stmt"
9023 (introduced by the vectorizer during the pattern recognition
9024 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9026 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9028 stmt_vec_info patt_info
9029 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9032 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
9033 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9034 gimple_set_bb (patt_stmt
, NULL
);
9035 tree lhs
= gimple_get_lhs (patt_stmt
);
9036 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9037 release_ssa_name (lhs
);
9040 gimple_stmt_iterator si
;
9041 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
9043 gimple
*seq_stmt
= gsi_stmt (si
);
9044 gimple_set_bb (seq_stmt
, NULL
);
9045 lhs
= gimple_get_lhs (seq_stmt
);
9046 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9047 release_ssa_name (lhs
);
9048 free_stmt_vec_info (seq_stmt
);
9051 free_stmt_vec_info (patt_stmt
);
9055 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9056 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9057 set_vinfo_for_stmt (stmt
, NULL
);
9062 /* Function get_vectype_for_scalar_type_and_size.
9064 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9068 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
9070 tree orig_scalar_type
= scalar_type
;
9071 scalar_mode inner_mode
;
9072 machine_mode simd_mode
;
9076 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9077 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9080 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9082 /* For vector types of elements whose mode precision doesn't
9083 match their types precision we use a element type of mode
9084 precision. The vectorization routines will have to make sure
9085 they support the proper result truncation/extension.
9086 We also make sure to build vector types with INTEGER_TYPE
9087 component type only. */
9088 if (INTEGRAL_TYPE_P (scalar_type
)
9089 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9090 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9091 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9092 TYPE_UNSIGNED (scalar_type
));
9094 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9095 When the component mode passes the above test simply use a type
9096 corresponding to that mode. The theory is that any use that
9097 would cause problems with this will disable vectorization anyway. */
9098 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9099 && !INTEGRAL_TYPE_P (scalar_type
))
9100 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9102 /* We can't build a vector type of elements with alignment bigger than
9104 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9105 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9106 TYPE_UNSIGNED (scalar_type
));
9108 /* If we felt back to using the mode fail if there was
9109 no scalar type for it. */
9110 if (scalar_type
== NULL_TREE
)
9113 /* If no size was supplied use the mode the target prefers. Otherwise
9114 lookup a vector mode of the specified size. */
9116 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9117 else if (!mode_for_vector (inner_mode
, size
/ nbytes
).exists (&simd_mode
))
9119 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
9120 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9124 vectype
= build_vector_type (scalar_type
, nunits
);
9126 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9127 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9130 /* Re-attach the address-space qualifier if we canonicalized the scalar
9132 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9133 return build_qualified_type
9134 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9139 unsigned int current_vector_size
;
9141 /* Function get_vectype_for_scalar_type.
9143 Returns the vector type corresponding to SCALAR_TYPE as supported
9147 get_vectype_for_scalar_type (tree scalar_type
)
9150 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9151 current_vector_size
);
9153 && current_vector_size
== 0)
9154 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9158 /* Function get_mask_type_for_scalar_type.
9160 Returns the mask type corresponding to a result of comparison
9161 of vectors of specified SCALAR_TYPE as supported by target. */
9164 get_mask_type_for_scalar_type (tree scalar_type
)
9166 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9171 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9172 current_vector_size
);
9175 /* Function get_same_sized_vectype
9177 Returns a vector type corresponding to SCALAR_TYPE of size
9178 VECTOR_TYPE if supported by the target. */
9181 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9183 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9184 return build_same_sized_truth_vector_type (vector_type
);
9186 return get_vectype_for_scalar_type_and_size
9187 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9190 /* Function vect_is_simple_use.
9193 VINFO - the vect info of the loop or basic block that is being vectorized.
9194 OPERAND - operand in the loop or bb.
9196 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9197 DT - the type of definition
9199 Returns whether a stmt with OPERAND can be vectorized.
9200 For loops, supportable operands are constants, loop invariants, and operands
9201 that are defined by the current iteration of the loop. Unsupportable
9202 operands are those that are defined by a previous iteration of the loop (as
9203 is the case in reduction/induction computations).
9204 For basic blocks, supportable operands are constants and bb invariants.
9205 For now, operands defined outside the basic block are not supported. */
9208 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9209 gimple
**def_stmt
, enum vect_def_type
*dt
)
9212 *dt
= vect_unknown_def_type
;
9214 if (dump_enabled_p ())
9216 dump_printf_loc (MSG_NOTE
, vect_location
,
9217 "vect_is_simple_use: operand ");
9218 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9219 dump_printf (MSG_NOTE
, "\n");
9222 if (CONSTANT_CLASS_P (operand
))
9224 *dt
= vect_constant_def
;
9228 if (is_gimple_min_invariant (operand
))
9230 *dt
= vect_external_def
;
9234 if (TREE_CODE (operand
) != SSA_NAME
)
9236 if (dump_enabled_p ())
9237 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9242 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9244 *dt
= vect_external_def
;
9248 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9249 if (dump_enabled_p ())
9251 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9252 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9255 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9256 *dt
= vect_external_def
;
9259 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9260 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9263 if (dump_enabled_p ())
9265 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9268 case vect_uninitialized_def
:
9269 dump_printf (MSG_NOTE
, "uninitialized\n");
9271 case vect_constant_def
:
9272 dump_printf (MSG_NOTE
, "constant\n");
9274 case vect_external_def
:
9275 dump_printf (MSG_NOTE
, "external\n");
9277 case vect_internal_def
:
9278 dump_printf (MSG_NOTE
, "internal\n");
9280 case vect_induction_def
:
9281 dump_printf (MSG_NOTE
, "induction\n");
9283 case vect_reduction_def
:
9284 dump_printf (MSG_NOTE
, "reduction\n");
9286 case vect_double_reduction_def
:
9287 dump_printf (MSG_NOTE
, "double reduction\n");
9289 case vect_nested_cycle
:
9290 dump_printf (MSG_NOTE
, "nested cycle\n");
9292 case vect_unknown_def_type
:
9293 dump_printf (MSG_NOTE
, "unknown\n");
9298 if (*dt
== vect_unknown_def_type
)
9300 if (dump_enabled_p ())
9301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9302 "Unsupported pattern.\n");
9306 switch (gimple_code (*def_stmt
))
9313 if (dump_enabled_p ())
9314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9315 "unsupported defining stmt:\n");
9322 /* Function vect_is_simple_use.
9324 Same as vect_is_simple_use but also determines the vector operand
9325 type of OPERAND and stores it to *VECTYPE. If the definition of
9326 OPERAND is vect_uninitialized_def, vect_constant_def or
9327 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9328 is responsible to compute the best suited vector type for the
9332 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9333 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9335 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9338 /* Now get a vector type if the def is internal, otherwise supply
9339 NULL_TREE and leave it up to the caller to figure out a proper
9340 type for the use stmt. */
9341 if (*dt
== vect_internal_def
9342 || *dt
== vect_induction_def
9343 || *dt
== vect_reduction_def
9344 || *dt
== vect_double_reduction_def
9345 || *dt
== vect_nested_cycle
)
9347 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9349 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9350 && !STMT_VINFO_RELEVANT (stmt_info
)
9351 && !STMT_VINFO_LIVE_P (stmt_info
))
9352 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9354 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9355 gcc_assert (*vectype
!= NULL_TREE
);
9357 else if (*dt
== vect_uninitialized_def
9358 || *dt
== vect_constant_def
9359 || *dt
== vect_external_def
)
9360 *vectype
= NULL_TREE
;
9368 /* Function supportable_widening_operation
9370 Check whether an operation represented by the code CODE is a
9371 widening operation that is supported by the target platform in
9372 vector form (i.e., when operating on arguments of type VECTYPE_IN
9373 producing a result of type VECTYPE_OUT).
9375 Widening operations we currently support are NOP (CONVERT), FLOAT
9376 and WIDEN_MULT. This function checks if these operations are supported
9377 by the target platform either directly (via vector tree-codes), or via
9381 - CODE1 and CODE2 are codes of vector operations to be used when
9382 vectorizing the operation, if available.
9383 - MULTI_STEP_CVT determines the number of required intermediate steps in
9384 case of multi-step conversion (like char->short->int - in that case
9385 MULTI_STEP_CVT will be 1).
9386 - INTERM_TYPES contains the intermediate type required to perform the
9387 widening operation (short in the above example). */
9390 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9391 tree vectype_out
, tree vectype_in
,
9392 enum tree_code
*code1
, enum tree_code
*code2
,
9393 int *multi_step_cvt
,
9394 vec
<tree
> *interm_types
)
9396 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9397 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9398 struct loop
*vect_loop
= NULL
;
9399 machine_mode vec_mode
;
9400 enum insn_code icode1
, icode2
;
9401 optab optab1
, optab2
;
9402 tree vectype
= vectype_in
;
9403 tree wide_vectype
= vectype_out
;
9404 enum tree_code c1
, c2
;
9406 tree prev_type
, intermediate_type
;
9407 machine_mode intermediate_mode
, prev_mode
;
9408 optab optab3
, optab4
;
9410 *multi_step_cvt
= 0;
9412 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9416 case WIDEN_MULT_EXPR
:
9417 /* The result of a vectorized widening operation usually requires
9418 two vectors (because the widened results do not fit into one vector).
9419 The generated vector results would normally be expected to be
9420 generated in the same order as in the original scalar computation,
9421 i.e. if 8 results are generated in each vector iteration, they are
9422 to be organized as follows:
9423 vect1: [res1,res2,res3,res4],
9424 vect2: [res5,res6,res7,res8].
9426 However, in the special case that the result of the widening
9427 operation is used in a reduction computation only, the order doesn't
9428 matter (because when vectorizing a reduction we change the order of
9429 the computation). Some targets can take advantage of this and
9430 generate more efficient code. For example, targets like Altivec,
9431 that support widen_mult using a sequence of {mult_even,mult_odd}
9432 generate the following vectors:
9433 vect1: [res1,res3,res5,res7],
9434 vect2: [res2,res4,res6,res8].
9436 When vectorizing outer-loops, we execute the inner-loop sequentially
9437 (each vectorized inner-loop iteration contributes to VF outer-loop
9438 iterations in parallel). We therefore don't allow to change the
9439 order of the computation in the inner-loop during outer-loop
9441 /* TODO: Another case in which order doesn't *really* matter is when we
9442 widen and then contract again, e.g. (short)((int)x * y >> 8).
9443 Normally, pack_trunc performs an even/odd permute, whereas the
9444 repack from an even/odd expansion would be an interleave, which
9445 would be significantly simpler for e.g. AVX2. */
9446 /* In any case, in order to avoid duplicating the code below, recurse
9447 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9448 are properly set up for the caller. If we fail, we'll continue with
9449 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9451 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9452 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9453 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9454 stmt
, vectype_out
, vectype_in
,
9455 code1
, code2
, multi_step_cvt
,
9458 /* Elements in a vector with vect_used_by_reduction property cannot
9459 be reordered if the use chain with this property does not have the
9460 same operation. One such an example is s += a * b, where elements
9461 in a and b cannot be reordered. Here we check if the vector defined
9462 by STMT is only directly used in the reduction statement. */
9463 tree lhs
= gimple_assign_lhs (stmt
);
9464 use_operand_p dummy
;
9466 stmt_vec_info use_stmt_info
= NULL
;
9467 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9468 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9469 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9472 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9473 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9486 case VEC_WIDEN_MULT_EVEN_EXPR
:
9487 /* Support the recursion induced just above. */
9488 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9489 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9492 case WIDEN_LSHIFT_EXPR
:
9493 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9494 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9498 c1
= VEC_UNPACK_LO_EXPR
;
9499 c2
= VEC_UNPACK_HI_EXPR
;
9503 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9504 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9507 case FIX_TRUNC_EXPR
:
9508 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9509 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9510 computing the operation. */
9517 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9520 if (code
== FIX_TRUNC_EXPR
)
9522 /* The signedness is determined from output operand. */
9523 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9524 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9528 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9529 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9532 if (!optab1
|| !optab2
)
9535 vec_mode
= TYPE_MODE (vectype
);
9536 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9537 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9543 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9544 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9545 /* For scalar masks we may have different boolean
9546 vector types having the same QImode. Thus we
9547 add additional check for elements number. */
9548 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9549 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9550 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9552 /* Check if it's a multi-step conversion that can be done using intermediate
9555 prev_type
= vectype
;
9556 prev_mode
= vec_mode
;
9558 if (!CONVERT_EXPR_CODE_P (code
))
9561 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9562 intermediate steps in promotion sequence. We try
9563 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9565 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9566 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9568 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9569 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9572 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9573 current_vector_size
);
9574 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9579 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9580 TYPE_UNSIGNED (prev_type
));
9582 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9583 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9585 if (!optab3
|| !optab4
9586 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9587 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9588 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9589 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9590 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9591 == CODE_FOR_nothing
)
9592 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9593 == CODE_FOR_nothing
))
9596 interm_types
->quick_push (intermediate_type
);
9597 (*multi_step_cvt
)++;
9599 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9600 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9601 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9602 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9603 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9605 prev_type
= intermediate_type
;
9606 prev_mode
= intermediate_mode
;
9609 interm_types
->release ();
9614 /* Function supportable_narrowing_operation
9616 Check whether an operation represented by the code CODE is a
9617 narrowing operation that is supported by the target platform in
9618 vector form (i.e., when operating on arguments of type VECTYPE_IN
9619 and producing a result of type VECTYPE_OUT).
9621 Narrowing operations we currently support are NOP (CONVERT) and
9622 FIX_TRUNC. This function checks if these operations are supported by
9623 the target platform directly via vector tree-codes.
9626 - CODE1 is the code of a vector operation to be used when
9627 vectorizing the operation, if available.
9628 - MULTI_STEP_CVT determines the number of required intermediate steps in
9629 case of multi-step conversion (like int->short->char - in that case
9630 MULTI_STEP_CVT will be 1).
9631 - INTERM_TYPES contains the intermediate type required to perform the
9632 narrowing operation (short in the above example). */
9635 supportable_narrowing_operation (enum tree_code code
,
9636 tree vectype_out
, tree vectype_in
,
9637 enum tree_code
*code1
, int *multi_step_cvt
,
9638 vec
<tree
> *interm_types
)
9640 machine_mode vec_mode
;
9641 enum insn_code icode1
;
9642 optab optab1
, interm_optab
;
9643 tree vectype
= vectype_in
;
9644 tree narrow_vectype
= vectype_out
;
9646 tree intermediate_type
, prev_type
;
9647 machine_mode intermediate_mode
, prev_mode
;
9651 *multi_step_cvt
= 0;
9655 c1
= VEC_PACK_TRUNC_EXPR
;
9658 case FIX_TRUNC_EXPR
:
9659 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9663 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9664 tree code and optabs used for computing the operation. */
9671 if (code
== FIX_TRUNC_EXPR
)
9672 /* The signedness is determined from output operand. */
9673 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9675 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9680 vec_mode
= TYPE_MODE (vectype
);
9681 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9686 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9687 /* For scalar masks we may have different boolean
9688 vector types having the same QImode. Thus we
9689 add additional check for elements number. */
9690 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9691 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9692 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9694 /* Check if it's a multi-step conversion that can be done using intermediate
9696 prev_mode
= vec_mode
;
9697 prev_type
= vectype
;
9698 if (code
== FIX_TRUNC_EXPR
)
9699 uns
= TYPE_UNSIGNED (vectype_out
);
9701 uns
= TYPE_UNSIGNED (vectype
);
9703 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9704 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9705 costly than signed. */
9706 if (code
== FIX_TRUNC_EXPR
&& uns
)
9708 enum insn_code icode2
;
9711 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9713 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9714 if (interm_optab
!= unknown_optab
9715 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9716 && insn_data
[icode1
].operand
[0].mode
9717 == insn_data
[icode2
].operand
[0].mode
)
9720 optab1
= interm_optab
;
9725 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9726 intermediate steps in promotion sequence. We try
9727 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9728 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9729 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9731 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9732 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9735 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9736 current_vector_size
);
9737 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9742 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9744 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9747 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9748 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9749 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9750 == CODE_FOR_nothing
))
9753 interm_types
->quick_push (intermediate_type
);
9754 (*multi_step_cvt
)++;
9756 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9757 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9758 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9759 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9761 prev_mode
= intermediate_mode
;
9762 prev_type
= intermediate_type
;
9763 optab1
= interm_optab
;
9766 interm_types
->release ();