1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
54 /* For lang_hooks.types.type_for_mode. */
55 #include "langhooks.h"
57 /* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59 enum vec_load_store_type
{
65 /* Return the vectorized type for the given statement. */
68 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
70 return STMT_VINFO_VECTYPE (stmt_info
);
73 /* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
76 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
78 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
79 basic_block bb
= gimple_bb (stmt
);
80 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
86 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
88 return (bb
->loop_father
== loop
->inner
);
91 /* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
96 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
97 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
98 int misalign
, enum vect_cost_model_location where
)
100 if ((kind
== vector_load
|| kind
== unaligned_load
)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
102 kind
= vector_gather_load
;
103 if ((kind
== vector_store
|| kind
== unaligned_store
)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
105 kind
= vector_scatter_store
;
108 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
109 stmt_info_for_cost si
= { count
, kind
,
110 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
112 body_cost_vec
->safe_push (si
);
114 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
117 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
118 count
, kind
, stmt_info
, misalign
, where
);
121 /* Return a variable of type ELEM_TYPE[NELEMS]. */
124 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
126 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
130 /* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
136 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
137 tree array
, unsigned HOST_WIDE_INT n
)
139 tree vect_type
, vect
, vect_name
, array_ref
;
142 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
143 vect_type
= TREE_TYPE (TREE_TYPE (array
));
144 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
145 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
146 build_int_cst (size_type_node
, n
),
147 NULL_TREE
, NULL_TREE
);
149 new_stmt
= gimple_build_assign (vect
, array_ref
);
150 vect_name
= make_ssa_name (vect
, new_stmt
);
151 gimple_assign_set_lhs (new_stmt
, vect_name
);
152 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
157 /* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
162 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
163 tree array
, unsigned HOST_WIDE_INT n
)
168 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
169 build_int_cst (size_type_node
, n
),
170 NULL_TREE
, NULL_TREE
);
172 new_stmt
= gimple_build_assign (array_ref
, vect
);
173 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
176 /* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
181 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
185 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
186 /* Arrays have the same alignment as their type. */
187 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
198 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
199 enum vect_relevant relevant
, bool live_p
)
201 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
202 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
203 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
204 gimple
*pattern_stmt
;
206 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE
, vect_location
,
209 "mark relevant %d, live %d: ", relevant
, live_p
);
210 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
224 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE
, vect_location
,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info
= vinfo_for_stmt (pattern_stmt
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
237 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
238 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
239 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
241 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE
, vect_location
,
246 "already marked relevant/live.\n");
250 worklist
->safe_push (stmt
);
254 /* Function is_simple_and_all_uses_invariant
256 Return true if STMT is simple and all uses of it are invariant. */
259 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
265 if (!is_gimple_assign (stmt
))
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt
)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
314 != loop_exit_ctrl_vec_info_type
)
315 *relevant
= vect_used_in_scope
;
317 /* changing memory. */
318 if (gimple_code (stmt
) != GIMPLE_PHI
)
319 if (gimple_vdef (stmt
)
320 && !gimple_clobber_p (stmt
))
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE
, vect_location
,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant
= vect_used_in_scope
;
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
331 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
333 basic_block bb
= gimple_bb (USE_STMT (use_p
));
334 if (!flow_bb_inside_loop_p (loop
, bb
))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE
, vect_location
,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 if (is_gimple_debug (USE_STMT (use_p
)))
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
346 gcc_assert (bb
== single_exit (loop
)->dest
);
353 if (*live_p
&& *relevant
== vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE
, vect_location
,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant
= vect_used_only_live
;
362 return (*live_p
|| *relevant
);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
372 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
375 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info
))
383 /* STMT has a data_ref. FORNOW this means that its of one of
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt
))
398 if (is_gimple_call (stmt
)
399 && gimple_call_internal_p (stmt
))
400 switch (gimple_call_internal_fn (stmt
))
403 operand
= gimple_call_arg (stmt
, 3);
408 operand
= gimple_call_arg (stmt
, 2);
418 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
420 operand
= gimple_assign_rhs1 (stmt
);
421 if (TREE_CODE (operand
) != SSA_NAME
)
432 Function process_use.
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
456 Return true if everything is as expected. Return false otherwise. */
459 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
460 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
463 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
464 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
465 stmt_vec_info dstmt_vinfo
;
466 basic_block bb
, def_bb
;
468 enum vect_def_type dt
;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
475 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
479 "not vectorized: unsupported use in stmt.\n");
483 if (!def_stmt
|| gimple_nop_p (def_stmt
))
486 def_bb
= gimple_bb (def_stmt
);
487 if (!flow_bb_inside_loop_p (loop
, def_bb
))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
500 bb
= gimple_bb (stmt
);
501 if (gimple_code (stmt
) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
503 && gimple_code (def_stmt
) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
505 && bb
->loop_father
== def_bb
->loop_father
)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE
, vect_location
,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
511 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
525 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
533 case vect_unused_in_scope
:
534 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
535 vect_used_in_scope
: vect_unused_in_scope
;
538 case vect_used_in_outer_by_reduction
:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
540 relevant
= vect_used_by_reduction
;
543 case vect_used_in_outer
:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
545 relevant
= vect_used_in_scope
;
548 case vect_used_in_scope
:
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
563 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE
, vect_location
,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
571 case vect_unused_in_scope
:
572 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
574 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
577 case vect_used_by_reduction
:
578 case vect_used_only_live
:
579 relevant
= vect_used_in_outer_by_reduction
;
582 case vect_used_in_scope
:
583 relevant
= vect_used_in_outer
;
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
594 else if (gimple_code (stmt
) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
596 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
597 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE
, vect_location
,
602 "induction value on backedge.\n");
607 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
612 /* Function vect_mark_stmts_to_be_vectorized.
614 Not all stmts in the loop need to be vectorized. For example:
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
626 This pass detects such stmts. */
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
631 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
632 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
633 unsigned int nbbs
= loop
->num_nodes
;
634 gimple_stmt_iterator si
;
637 stmt_vec_info stmt_vinfo
;
641 enum vect_relevant relevant
;
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE
, vect_location
,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
647 auto_vec
<gimple
*, 64> worklist
;
649 /* 1. Init worklist. */
650 for (i
= 0; i
< nbbs
; i
++)
653 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
662 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
663 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
665 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
667 stmt
= gsi_stmt (si
);
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
674 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
675 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
679 /* 2. Process_worklist */
680 while (worklist
.length () > 0)
685 stmt
= worklist
.pop ();
686 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
695 stmt_vinfo
= vinfo_for_stmt (stmt
);
696 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
701 One exception is when STMT has been identified as defining a reduction
702 variable; in this case we set the relevance to vect_used_by_reduction.
703 This is because we distinguish between two kinds of relevant stmts -
704 those that are used by a reduction computation, and those that are
705 (also) used by a regular computation. This allows us later on to
706 identify stmts that are used solely by a reduction, and therefore the
707 order of the results that they produce does not have to be kept. */
709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
711 case vect_reduction_def
:
712 gcc_assert (relevant
!= vect_unused_in_scope
);
713 if (relevant
!= vect_unused_in_scope
714 && relevant
!= vect_used_in_scope
715 && relevant
!= vect_used_by_reduction
716 && relevant
!= vect_used_only_live
)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
720 "unsupported use of reduction.\n");
725 case vect_nested_cycle
:
726 if (relevant
!= vect_unused_in_scope
727 && relevant
!= vect_used_in_outer_by_reduction
728 && relevant
!= vect_used_in_outer
)
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
732 "unsupported use of nested cycle.\n");
738 case vect_double_reduction_def
:
739 if (relevant
!= vect_unused_in_scope
740 && relevant
!= vect_used_by_reduction
741 && relevant
!= vect_used_only_live
)
743 if (dump_enabled_p ())
744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
745 "unsupported use of double reduction.\n");
755 if (is_pattern_stmt_p (stmt_vinfo
))
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt
))
762 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
763 tree op
= gimple_assign_rhs1 (stmt
);
766 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
768 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
769 relevant
, &worklist
, false)
770 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
771 relevant
, &worklist
, false))
775 for (; i
< gimple_num_ops (stmt
); i
++)
777 op
= gimple_op (stmt
, i
);
778 if (TREE_CODE (op
) == SSA_NAME
779 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
784 else if (is_gimple_call (stmt
))
786 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
788 tree arg
= gimple_call_arg (stmt
, i
);
789 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
796 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
798 tree op
= USE_FROM_PTR (use_p
);
799 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
806 gather_scatter_info gs_info
;
807 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
809 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
813 } /* while worklist */
819 /* Function vect_model_simple_cost.
821 Models cost for simple operations, i.e. those that only emit ncopies of a
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
826 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
827 enum vect_def_type
*dt
,
829 stmt_vector_for_cost
*prologue_cost_vec
,
830 stmt_vector_for_cost
*body_cost_vec
)
833 int inside_cost
= 0, prologue_cost
= 0;
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info
))
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
842 for (i
= 0; i
< ndts
; i
++)
843 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
844 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
845 stmt_info
, 0, vect_prologue
);
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
848 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
849 stmt_info
, 0, vect_body
);
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_NOTE
, vect_location
,
853 "vect_model_simple_cost: inside_cost = %d, "
854 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
858 /* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
864 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
865 enum vect_def_type
*dt
, int pwr
)
868 int inside_cost
= 0, prologue_cost
= 0;
869 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
870 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
871 void *target_cost_data
;
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info
))
878 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
880 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
882 for (i
= 0; i
< pwr
+ 1; i
++)
884 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
886 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
887 vec_promote_demote
, stmt_info
, 0,
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i
= 0; i
< 2; i
++)
893 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
894 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
895 stmt_info
, 0, vect_prologue
);
897 if (dump_enabled_p ())
898 dump_printf_loc (MSG_NOTE
, vect_location
,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
900 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
903 /* Function vect_model_store_cost
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
909 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
910 vect_memory_access_type memory_access_type
,
911 enum vect_def_type dt
, slp_tree slp_node
,
912 stmt_vector_for_cost
*prologue_cost_vec
,
913 stmt_vector_for_cost
*body_cost_vec
)
915 unsigned int inside_cost
= 0, prologue_cost
= 0;
916 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
917 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
918 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
920 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
921 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
922 stmt_info
, 0, vect_prologue
);
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node
&& grouped_access_p
)
928 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
929 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
937 /* We assume that the cost of a single store-lanes instruction is
938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
939 access is instead being provided by a permute-and-store operation,
940 include the cost of the permutes. */
942 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
944 /* Uses a high and low interleave or shuffle operations for each
946 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
947 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
948 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
949 stmt_info
, 0, vect_body
);
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE
, vect_location
,
953 "vect_model_store_cost: strided group_size = %d .\n",
957 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
958 /* Costs of the stores. */
959 if (memory_access_type
== VMAT_ELEMENTWISE
960 || memory_access_type
== VMAT_GATHER_SCATTER
)
961 /* N scalar stores plus extracting the elements. */
962 inside_cost
+= record_stmt_cost (body_cost_vec
,
963 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
964 scalar_store
, stmt_info
, 0, vect_body
);
966 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
968 if (memory_access_type
== VMAT_ELEMENTWISE
969 || memory_access_type
== VMAT_STRIDED_SLP
)
970 inside_cost
+= record_stmt_cost (body_cost_vec
,
971 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
972 vec_to_scalar
, stmt_info
, 0, vect_body
);
974 if (dump_enabled_p ())
975 dump_printf_loc (MSG_NOTE
, vect_location
,
976 "vect_model_store_cost: inside_cost = %d, "
977 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
981 /* Calculate cost of DR's memory access. */
983 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
984 unsigned int *inside_cost
,
985 stmt_vector_for_cost
*body_cost_vec
)
987 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
988 gimple
*stmt
= DR_STMT (dr
);
989 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
991 switch (alignment_support_scheme
)
995 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
996 vector_store
, stmt_info
, 0,
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE
, vect_location
,
1001 "vect_model_store_cost: aligned.\n");
1005 case dr_unaligned_supported
:
1007 /* Here, we assign an additional cost for the unaligned store. */
1008 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1009 unaligned_store
, stmt_info
,
1010 DR_MISALIGNMENT (dr
), vect_body
);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE
, vect_location
,
1013 "vect_model_store_cost: unaligned supported by "
1018 case dr_unaligned_unsupported
:
1020 *inside_cost
= VECT_MAX_COST
;
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1024 "vect_model_store_cost: unsupported access.\n");
1034 /* Function vect_model_load_cost
1036 Models cost for loads. In the case of grouped accesses, one access has
1037 the overhead of the grouped access attributed to it. Since unaligned
1038 accesses are supported for loads, we also account for the costs of the
1039 access scheme chosen. */
1042 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1043 vect_memory_access_type memory_access_type
,
1045 stmt_vector_for_cost
*prologue_cost_vec
,
1046 stmt_vector_for_cost
*body_cost_vec
)
1048 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1049 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1050 unsigned int inside_cost
= 0, prologue_cost
= 0;
1051 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1053 /* Grouped loads read all elements in the group at once,
1054 so we want the DR for the first statement. */
1055 if (!slp_node
&& grouped_access_p
)
1057 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1058 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1061 /* True if we should include any once-per-group costs as well as
1062 the cost of the statement itself. For SLP we only get called
1063 once per group anyhow. */
1064 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1066 /* We assume that the cost of a single load-lanes instruction is
1067 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1068 access is instead being provided by a load-and-permute operation,
1069 include the cost of the permutes. */
1071 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1073 /* Uses an even and odd extract operations or shuffle operations
1074 for each needed permute. */
1075 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1076 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1077 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1078 stmt_info
, 0, vect_body
);
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_NOTE
, vect_location
,
1082 "vect_model_load_cost: strided group_size = %d .\n",
1086 /* The loads themselves. */
1087 if (memory_access_type
== VMAT_ELEMENTWISE
1088 || memory_access_type
== VMAT_GATHER_SCATTER
)
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1092 inside_cost
+= record_stmt_cost (body_cost_vec
,
1093 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1094 scalar_load
, stmt_info
, 0, vect_body
);
1097 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1098 &inside_cost
, &prologue_cost
,
1099 prologue_cost_vec
, body_cost_vec
, true);
1100 if (memory_access_type
== VMAT_ELEMENTWISE
1101 || memory_access_type
== VMAT_STRIDED_SLP
)
1102 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1103 stmt_info
, 0, vect_body
);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE
, vect_location
,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1112 /* Calculate cost of DR's memory access. */
1114 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1115 bool add_realign_cost
, unsigned int *inside_cost
,
1116 unsigned int *prologue_cost
,
1117 stmt_vector_for_cost
*prologue_cost_vec
,
1118 stmt_vector_for_cost
*body_cost_vec
,
1119 bool record_prologue_costs
)
1121 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1122 gimple
*stmt
= DR_STMT (dr
);
1123 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1125 switch (alignment_support_scheme
)
1129 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1130 stmt_info
, 0, vect_body
);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE
, vect_location
,
1134 "vect_model_load_cost: aligned.\n");
1138 case dr_unaligned_supported
:
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1142 unaligned_load
, stmt_info
,
1143 DR_MISALIGNMENT (dr
), vect_body
);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: unaligned supported by "
1152 case dr_explicit_realign
:
1154 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1155 vector_load
, stmt_info
, 0, vect_body
);
1156 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1157 vec_perm
, stmt_info
, 0, vect_body
);
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1162 if (targetm
.vectorize
.builtin_mask_for_load
)
1163 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1164 stmt_info
, 0, vect_body
);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE
, vect_location
,
1168 "vect_model_load_cost: explicit realign\n");
1172 case dr_explicit_realign_optimized
:
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE
, vect_location
,
1176 "vect_model_load_cost: unaligned software "
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1186 if (add_realign_cost
&& record_prologue_costs
)
1188 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1189 vector_stmt
, stmt_info
,
1191 if (targetm
.vectorize
.builtin_mask_for_load
)
1192 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1193 vector_stmt
, stmt_info
,
1197 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1198 stmt_info
, 0, vect_body
);
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1200 stmt_info
, 0, vect_body
);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE
, vect_location
,
1204 "vect_model_load_cost: explicit realign optimized"
1210 case dr_unaligned_unsupported
:
1212 *inside_cost
= VECT_MAX_COST
;
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1216 "vect_model_load_cost: unsupported access.\n");
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1229 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1232 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1235 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1236 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1240 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1244 if (nested_in_vect_loop_p (loop
, stmt
))
1247 pe
= loop_preheader_edge (loop
);
1248 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1249 gcc_assert (!new_bb
);
1253 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1255 gimple_stmt_iterator gsi_bb_start
;
1257 gcc_assert (bb_vinfo
);
1258 bb
= BB_VINFO_BB (bb_vinfo
);
1259 gsi_bb_start
= gsi_after_labels (bb
);
1260 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1264 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE
, vect_location
,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1272 /* Function vect_init_vector.
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1283 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1291 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1292 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type
))
1298 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1299 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1301 if (CONSTANT_CLASS_P (val
))
1302 val
= integer_zerop (val
) ? false_val
: true_val
;
1305 new_temp
= make_ssa_name (TREE_TYPE (type
));
1306 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1307 val
, true_val
, false_val
);
1308 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1312 else if (CONSTANT_CLASS_P (val
))
1313 val
= fold_convert (TREE_TYPE (type
), val
);
1316 new_temp
= make_ssa_name (TREE_TYPE (type
));
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1318 init_stmt
= gimple_build_assign (new_temp
,
1319 fold_build1 (VIEW_CONVERT_EXPR
,
1323 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1324 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1328 val
= build_vector_from_val (type
, val
);
1331 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1332 init_stmt
= gimple_build_assign (new_temp
, val
);
1333 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1337 /* Function vect_get_vec_def_for_operand_1.
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
1343 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1347 stmt_vec_info def_stmt_info
= NULL
;
1351 /* operand is a constant or a loop invariant. */
1352 case vect_constant_def
:
1353 case vect_external_def
:
1354 /* Code should use vect_get_vec_def_for_operand. */
1357 /* operand is defined inside the loop. */
1358 case vect_internal_def
:
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1363 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1364 /* Get vectorized pattern statement. */
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1368 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1370 gcc_assert (vec_stmt
);
1371 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1372 vec_oprnd
= PHI_RESULT (vec_stmt
);
1373 else if (is_gimple_call (vec_stmt
))
1374 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1376 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1380 /* operand is defined by a loop header phi. */
1381 case vect_reduction_def
:
1382 case vect_double_reduction_def
:
1383 case vect_nested_cycle
:
1384 case vect_induction_def
:
1386 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1390 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1391 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1392 vec_oprnd
= PHI_RESULT (vec_stmt
);
1394 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1404 /* Function vect_get_vec_def_for_operand.
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1417 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1420 enum vect_def_type dt
;
1422 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1423 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1425 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE
, vect_location
,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1430 dump_printf (MSG_NOTE
, "\n");
1433 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1434 gcc_assert (is_simple_use
);
1435 if (def_stmt
&& dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1441 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1443 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1447 vector_type
= vectype
;
1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1450 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1452 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1454 gcc_assert (vector_type
);
1455 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1458 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1521 gimple
*vec_stmt_for_operand
;
1522 stmt_vec_info def_stmt_info
;
1524 /* Do nothing; can reuse same def. */
1525 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1528 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1529 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1530 gcc_assert (def_stmt_info
);
1531 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1532 gcc_assert (vec_stmt_for_operand
);
1533 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1534 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1536 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1546 vec
<tree
> *vec_oprnds0
,
1547 vec
<tree
> *vec_oprnds1
)
1549 tree vec_oprnd
= vec_oprnds0
->pop ();
1551 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1552 vec_oprnds0
->quick_push (vec_oprnd
);
1554 if (vec_oprnds1
&& vec_oprnds1
->length ())
1556 vec_oprnd
= vec_oprnds1
->pop ();
1557 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1558 vec_oprnds1
->quick_push (vec_oprnd
);
1563 /* Get vectorized definitions for OP0 and OP1. */
1566 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1567 vec
<tree
> *vec_oprnds0
,
1568 vec
<tree
> *vec_oprnds1
,
1573 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1574 auto_vec
<tree
> ops (nops
);
1575 auto_vec
<vec
<tree
> > vec_defs (nops
);
1577 ops
.quick_push (op0
);
1579 ops
.quick_push (op1
);
1581 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1583 *vec_oprnds0
= vec_defs
[0];
1585 *vec_oprnds1
= vec_defs
[1];
1591 vec_oprnds0
->create (1);
1592 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1593 vec_oprnds0
->quick_push (vec_oprnd
);
1597 vec_oprnds1
->create (1);
1598 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1599 vec_oprnds1
->quick_push (vec_oprnd
);
1605 /* Function vect_finish_stmt_generation.
1607 Insert a new stmt. */
1610 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1611 gimple_stmt_iterator
*gsi
)
1613 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1614 vec_info
*vinfo
= stmt_info
->vinfo
;
1616 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1618 if (!gsi_end_p (*gsi
)
1619 && gimple_has_mem_ops (vec_stmt
))
1621 gimple
*at_stmt
= gsi_stmt (*gsi
);
1622 tree vuse
= gimple_vuse (at_stmt
);
1623 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1625 tree vdef
= gimple_vdef (at_stmt
);
1626 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1632 && ((is_gimple_assign (vec_stmt
)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1634 || (is_gimple_call (vec_stmt
)
1635 && !(gimple_call_flags (vec_stmt
)
1636 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1638 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1639 gimple_set_vdef (vec_stmt
, new_vdef
);
1640 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1644 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1646 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1648 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1654 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1660 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1661 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1664 /* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
1670 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1671 tree vectype_out
, tree vectype_in
)
1674 if (internal_fn_p (cfn
))
1675 ifn
= as_internal_fn (cfn
);
1677 ifn
= associated_internal_fn (fndecl
);
1678 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1680 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1681 if (info
.vectorizable
)
1683 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1684 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1685 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1686 OPTIMIZE_FOR_SPEED
))
1694 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1695 gimple_stmt_iterator
*);
1697 /* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1702 compare_step_with_zero (gimple
*stmt
)
1704 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1705 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1706 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1714 perm_mask_for_reverse (tree vectype
)
1718 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1720 /* The encoding has a single stepped pattern. */
1721 vec_perm_builder
sel (nunits
, 1, 3);
1722 for (i
= 0; i
< 3; ++i
)
1723 sel
.quick_push (nunits
- 1 - i
);
1725 vec_perm_indices
indices (sel
, 1, nunits
);
1726 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1728 return vect_gen_perm_mask_checked (vectype
, indices
);
1731 /* A subroutine of get_load_store_type, with a subset of the same
1732 arguments. Handle the case where STMT is part of a grouped load
1735 For stores, the statements in the group are all consecutive
1736 and there is no gap at the end. For loads, the statements in the
1737 group might not be consecutive; there can be gaps between statements
1738 as well as at the end. */
1741 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1742 vec_load_store_type vls_type
,
1743 vect_memory_access_type
*memory_access_type
)
1745 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1746 vec_info
*vinfo
= stmt_info
->vinfo
;
1747 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1748 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1749 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1750 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1751 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1752 bool single_element_p
= (stmt
== first_stmt
1753 && !GROUP_NEXT_ELEMENT (stmt_info
));
1754 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1755 unsigned nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1757 /* True if the vectorized statements would access beyond the last
1758 statement in the group. */
1759 bool overrun_p
= false;
1761 /* True if we can cope with such overrun by peeling for gaps, so that
1762 there is at least one final scalar iteration after the vector loop. */
1763 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1765 /* There can only be a gap at the end of the group if the stride is
1766 known at compile time. */
1767 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1769 /* Stores can't yet have gaps. */
1770 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1774 if (STMT_VINFO_STRIDED_P (stmt_info
))
1776 /* Try to use consecutive accesses of GROUP_SIZE elements,
1777 separated by the stride, until we have a complete vector.
1778 Fall back to scalar accesses if that isn't possible. */
1779 if (nunits
% group_size
== 0)
1780 *memory_access_type
= VMAT_STRIDED_SLP
;
1782 *memory_access_type
= VMAT_ELEMENTWISE
;
1786 overrun_p
= loop_vinfo
&& gap
!= 0;
1787 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1790 "Grouped store with gaps requires"
1791 " non-consecutive accesses\n");
1794 /* An overrun is fine if the trailing elements are smaller
1795 than the alignment boundary B. Every vector access will
1796 be a multiple of B and so we are guaranteed to access a
1797 non-gap element in the same B-sized block. */
1799 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1800 / vect_get_scalar_dr_size (first_dr
)))
1802 if (overrun_p
&& !can_overrun_p
)
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1806 "Peeling for outer loop is not supported\n");
1809 *memory_access_type
= VMAT_CONTIGUOUS
;
1814 /* We can always handle this case using elementwise accesses,
1815 but see if something more efficient is available. */
1816 *memory_access_type
= VMAT_ELEMENTWISE
;
1818 /* If there is a gap at the end of the group then these optimizations
1819 would access excess elements in the last iteration. */
1820 bool would_overrun_p
= (gap
!= 0);
1821 /* An overrun is fine if the trailing elements are smaller than the
1822 alignment boundary B. Every vector access will be a multiple of B
1823 and so we are guaranteed to access a non-gap element in the
1824 same B-sized block. */
1826 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1827 / vect_get_scalar_dr_size (first_dr
)))
1828 would_overrun_p
= false;
1830 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1831 && (can_overrun_p
|| !would_overrun_p
)
1832 && compare_step_with_zero (stmt
) > 0)
1834 /* First try using LOAD/STORE_LANES. */
1835 if (vls_type
== VLS_LOAD
1836 ? vect_load_lanes_supported (vectype
, group_size
)
1837 : vect_store_lanes_supported (vectype
, group_size
))
1839 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1840 overrun_p
= would_overrun_p
;
1843 /* If that fails, try using permuting loads. */
1844 if (*memory_access_type
== VMAT_ELEMENTWISE
1845 && (vls_type
== VLS_LOAD
1846 ? vect_grouped_load_supported (vectype
, single_element_p
,
1848 : vect_grouped_store_supported (vectype
, group_size
)))
1850 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1851 overrun_p
= would_overrun_p
;
1856 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1858 /* STMT is the leader of the group. Check the operands of all the
1859 stmts of the group. */
1860 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1863 gcc_assert (gimple_assign_single_p (next_stmt
));
1864 tree op
= gimple_assign_rhs1 (next_stmt
);
1866 enum vect_def_type dt
;
1867 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1871 "use not simple.\n");
1874 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1880 gcc_assert (can_overrun_p
);
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1883 "Data access with gaps requires scalar "
1885 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1891 /* A subroutine of get_load_store_type, with a subset of the same
1892 arguments. Handle the case where STMT is a load or store that
1893 accesses consecutive elements with a negative step. */
1895 static vect_memory_access_type
1896 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1897 vec_load_store_type vls_type
,
1898 unsigned int ncopies
)
1900 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1901 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1902 dr_alignment_support alignment_support_scheme
;
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1908 "multiple types with negative step.\n");
1909 return VMAT_ELEMENTWISE
;
1912 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1913 if (alignment_support_scheme
!= dr_aligned
1914 && alignment_support_scheme
!= dr_unaligned_supported
)
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1918 "negative step but alignment required.\n");
1919 return VMAT_ELEMENTWISE
;
1922 if (vls_type
== VLS_STORE_INVARIANT
)
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_NOTE
, vect_location
,
1926 "negative step with invariant source;"
1927 " no permute needed.\n");
1928 return VMAT_CONTIGUOUS_DOWN
;
1931 if (!perm_mask_for_reverse (vectype
))
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1935 "negative step and reversing not supported.\n");
1936 return VMAT_ELEMENTWISE
;
1939 return VMAT_CONTIGUOUS_REVERSE
;
1942 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1943 if there is a memory access type that the vectorized form can use,
1944 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1945 or scatters, fill in GS_INFO accordingly.
1947 SLP says whether we're performing SLP rather than loop vectorization.
1948 VECTYPE is the vector type that the vectorized statements will use.
1949 NCOPIES is the number of vector statements that will be needed. */
1952 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1953 vec_load_store_type vls_type
, unsigned int ncopies
,
1954 vect_memory_access_type
*memory_access_type
,
1955 gather_scatter_info
*gs_info
)
1957 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1958 vec_info
*vinfo
= stmt_info
->vinfo
;
1959 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1960 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1962 *memory_access_type
= VMAT_GATHER_SCATTER
;
1964 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1966 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1967 &gs_info
->offset_dt
,
1968 &gs_info
->offset_vectype
))
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1972 "%s index use not simple.\n",
1973 vls_type
== VLS_LOAD
? "gather" : "scatter");
1977 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1979 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1980 memory_access_type
))
1983 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1986 *memory_access_type
= VMAT_ELEMENTWISE
;
1990 int cmp
= compare_step_with_zero (stmt
);
1992 *memory_access_type
= get_negative_load_store_type
1993 (stmt
, vectype
, vls_type
, ncopies
);
1996 gcc_assert (vls_type
== VLS_LOAD
);
1997 *memory_access_type
= VMAT_INVARIANT
;
2000 *memory_access_type
= VMAT_CONTIGUOUS
;
2003 /* FIXME: At the moment the cost model seems to underestimate the
2004 cost of using elementwise accesses. This check preserves the
2005 traditional behavior until that can be fixed. */
2006 if (*memory_access_type
== VMAT_ELEMENTWISE
2007 && !STMT_VINFO_STRIDED_P (stmt_info
))
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2011 "not falling back to elementwise accesses\n");
2017 /* Function vectorizable_mask_load_store.
2019 Check if STMT performs a conditional load or store that can be vectorized.
2020 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2021 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2022 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2025 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2026 gimple
**vec_stmt
, slp_tree slp_node
)
2028 tree vec_dest
= NULL
;
2029 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2030 stmt_vec_info prev_stmt_info
;
2031 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2032 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2033 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2034 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2035 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2036 tree rhs_vectype
= NULL_TREE
;
2041 tree dataref_ptr
= NULL_TREE
;
2043 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2047 gather_scatter_info gs_info
;
2048 vec_load_store_type vls_type
;
2051 enum vect_def_type dt
;
2053 if (slp_node
!= NULL
)
2056 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2057 gcc_assert (ncopies
>= 1);
2059 mask
= gimple_call_arg (stmt
, 2);
2061 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2064 /* FORNOW. This restriction should be relaxed. */
2065 if (nested_in_vect_loop
&& ncopies
> 1)
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2069 "multiple types in nested loop.");
2073 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2076 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2080 if (!STMT_VINFO_DATA_REF (stmt_info
))
2083 elem_type
= TREE_TYPE (vectype
);
2085 if (TREE_CODE (mask
) != SSA_NAME
)
2088 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2092 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2094 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2095 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2098 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2100 tree rhs
= gimple_call_arg (stmt
, 3);
2101 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2103 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2104 vls_type
= VLS_STORE_INVARIANT
;
2106 vls_type
= VLS_STORE
;
2109 vls_type
= VLS_LOAD
;
2111 vect_memory_access_type memory_access_type
;
2112 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2113 &memory_access_type
, &gs_info
))
2116 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2118 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2120 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2121 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2125 "masked gather with integer mask not supported.");
2129 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2133 "unsupported access type for masked %s.\n",
2134 vls_type
== VLS_LOAD
? "load" : "store");
2137 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2138 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2139 TYPE_MODE (mask_vectype
),
2140 vls_type
== VLS_LOAD
)
2142 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2145 if (!vec_stmt
) /* transformation not required. */
2147 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2148 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2149 if (vls_type
== VLS_LOAD
)
2150 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2153 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2154 dt
, NULL
, NULL
, NULL
);
2157 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2161 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2163 tree vec_oprnd0
= NULL_TREE
, op
;
2164 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2165 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2166 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2167 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2168 tree mask_perm_mask
= NULL_TREE
;
2169 edge pe
= loop_preheader_edge (loop
);
2172 enum { NARROW
, NONE
, WIDEN
} modifier
;
2173 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2175 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2176 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2177 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2178 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2179 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2180 scaletype
= TREE_VALUE (arglist
);
2181 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2182 && types_compatible_p (srctype
, masktype
));
2184 if (nunits
== gather_off_nunits
)
2186 else if (nunits
== gather_off_nunits
/ 2)
2190 vec_perm_builder
sel (gather_off_nunits
, gather_off_nunits
, 1);
2191 for (i
= 0; i
< gather_off_nunits
; ++i
)
2192 sel
.quick_push (i
| nunits
);
2194 vec_perm_indices
indices (sel
, 1, gather_off_nunits
);
2195 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
2198 else if (nunits
== gather_off_nunits
* 2)
2202 vec_perm_builder
sel (nunits
, nunits
, 1);
2203 sel
.quick_grow (nunits
);
2204 for (i
= 0; i
< nunits
; ++i
)
2205 sel
[i
] = i
< gather_off_nunits
2206 ? i
: i
+ nunits
- gather_off_nunits
;
2207 vec_perm_indices
indices (sel
, 2, nunits
);
2208 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2212 for (i
= 0; i
< nunits
; ++i
)
2213 sel
[i
] = i
| gather_off_nunits
;
2214 indices
.new_vector (sel
, 2, gather_off_nunits
);
2215 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2220 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2222 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2223 if (!is_gimple_min_invariant (ptr
))
2225 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2226 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2227 gcc_assert (!new_bb
);
2230 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2232 prev_stmt_info
= NULL
;
2233 for (j
= 0; j
< ncopies
; ++j
)
2235 if (modifier
== WIDEN
&& (j
& 1))
2236 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2237 perm_mask
, stmt
, gsi
);
2240 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2243 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2245 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2248 == TYPE_VECTOR_SUBPARTS (idxtype
));
2249 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2250 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2252 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2253 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2257 if (mask_perm_mask
&& (j
& 1))
2258 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2259 mask_perm_mask
, stmt
, gsi
);
2263 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2266 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2267 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2271 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2273 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2274 == TYPE_VECTOR_SUBPARTS (masktype
));
2275 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2276 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2278 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2279 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2285 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2288 if (!useless_type_conversion_p (vectype
, rettype
))
2290 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2291 == TYPE_VECTOR_SUBPARTS (rettype
));
2292 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2293 gimple_call_set_lhs (new_stmt
, op
);
2294 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2295 var
= make_ssa_name (vec_dest
);
2296 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2297 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2301 var
= make_ssa_name (vec_dest
, new_stmt
);
2302 gimple_call_set_lhs (new_stmt
, var
);
2305 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2307 if (modifier
== NARROW
)
2314 var
= permute_vec_elements (prev_res
, var
,
2315 perm_mask
, stmt
, gsi
);
2316 new_stmt
= SSA_NAME_DEF_STMT (var
);
2319 if (prev_stmt_info
== NULL
)
2320 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2322 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2323 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2326 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2328 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2330 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2331 stmt_info
= vinfo_for_stmt (stmt
);
2333 tree lhs
= gimple_call_lhs (stmt
);
2334 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2335 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2336 set_vinfo_for_stmt (stmt
, NULL
);
2337 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2338 gsi_replace (gsi
, new_stmt
, true);
2341 else if (vls_type
!= VLS_LOAD
)
2343 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2344 prev_stmt_info
= NULL
;
2345 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2346 for (i
= 0; i
< ncopies
; i
++)
2348 unsigned align
, misalign
;
2352 tree rhs
= gimple_call_arg (stmt
, 3);
2353 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2354 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2356 /* We should have catched mismatched types earlier. */
2357 gcc_assert (useless_type_conversion_p (vectype
,
2358 TREE_TYPE (vec_rhs
)));
2359 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2360 NULL_TREE
, &dummy
, gsi
,
2361 &ptr_incr
, false, &inv_p
);
2362 gcc_assert (!inv_p
);
2366 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2367 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2368 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2369 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2370 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2371 TYPE_SIZE_UNIT (vectype
));
2374 align
= DR_TARGET_ALIGNMENT (dr
);
2375 if (aligned_access_p (dr
))
2377 else if (DR_MISALIGNMENT (dr
) == -1)
2379 align
= TYPE_ALIGN_UNIT (elem_type
);
2383 misalign
= DR_MISALIGNMENT (dr
);
2384 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2386 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2387 misalign
? least_bit_hwi (misalign
) : align
);
2389 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2390 ptr
, vec_mask
, vec_rhs
);
2391 gimple_call_set_nothrow (call
, true);
2393 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2395 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2397 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2398 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2403 tree vec_mask
= NULL_TREE
;
2404 prev_stmt_info
= NULL
;
2405 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2406 for (i
= 0; i
< ncopies
; i
++)
2408 unsigned align
, misalign
;
2412 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2414 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2415 NULL_TREE
, &dummy
, gsi
,
2416 &ptr_incr
, false, &inv_p
);
2417 gcc_assert (!inv_p
);
2421 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2422 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2423 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2424 TYPE_SIZE_UNIT (vectype
));
2427 align
= DR_TARGET_ALIGNMENT (dr
);
2428 if (aligned_access_p (dr
))
2430 else if (DR_MISALIGNMENT (dr
) == -1)
2432 align
= TYPE_ALIGN_UNIT (elem_type
);
2436 misalign
= DR_MISALIGNMENT (dr
);
2437 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2439 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2440 misalign
? least_bit_hwi (misalign
) : align
);
2442 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2444 gimple_call_set_lhs (call
, make_ssa_name (vec_dest
));
2445 gimple_call_set_nothrow (call
, true);
2446 vect_finish_stmt_generation (stmt
, call
, gsi
);
2448 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= call
;
2450 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = call
;
2451 prev_stmt_info
= vinfo_for_stmt (call
);
2455 if (vls_type
== VLS_LOAD
)
2457 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2459 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2461 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2462 stmt_info
= vinfo_for_stmt (stmt
);
2464 tree lhs
= gimple_call_lhs (stmt
);
2465 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2466 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2467 set_vinfo_for_stmt (stmt
, NULL
);
2468 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2469 gsi_replace (gsi
, new_stmt
, true);
2475 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2478 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2479 gimple
**vec_stmt
, slp_tree slp_node
,
2480 tree vectype_in
, enum vect_def_type
*dt
)
2483 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2484 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2485 unsigned ncopies
, nunits
;
2487 op
= gimple_call_arg (stmt
, 0);
2488 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2489 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2491 /* Multiple types in SLP are handled by creating the appropriate number of
2492 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2497 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2499 gcc_assert (ncopies
>= 1);
2501 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2505 unsigned int num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2506 unsigned word_bytes
= num_bytes
/ nunits
;
2508 /* The encoding uses one stepped pattern for each byte in the word. */
2509 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2510 for (unsigned i
= 0; i
< 3; ++i
)
2511 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2512 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2514 vec_perm_indices
indices (elts
, 1, num_bytes
);
2515 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2520 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2524 if (! PURE_SLP_STMT (stmt_info
))
2526 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2527 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2528 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2529 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2534 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2537 vec
<tree
> vec_oprnds
= vNULL
;
2538 gimple
*new_stmt
= NULL
;
2539 stmt_vec_info prev_stmt_info
= NULL
;
2540 for (unsigned j
= 0; j
< ncopies
; j
++)
2544 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2546 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2548 /* Arguments are ready. create the new vector stmt. */
2551 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2553 tree tem
= make_ssa_name (char_vectype
);
2554 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2555 char_vectype
, vop
));
2556 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2557 tree tem2
= make_ssa_name (char_vectype
);
2558 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2559 tem
, tem
, bswap_vconst
);
2560 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2561 tem
= make_ssa_name (vectype
);
2562 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2564 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2566 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2573 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2575 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2577 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2580 vec_oprnds
.release ();
2584 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2585 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2586 in a single step. On success, store the binary pack code in
2590 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2591 tree_code
*convert_code
)
2593 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2594 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2598 int multi_step_cvt
= 0;
2599 auto_vec
<tree
, 8> interm_types
;
2600 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2601 &code
, &multi_step_cvt
,
2606 *convert_code
= code
;
2610 /* Function vectorizable_call.
2612 Check if GS performs a function call that can be vectorized.
2613 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2614 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2615 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2618 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2625 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2626 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2627 tree vectype_out
, vectype_in
;
2630 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2631 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2632 vec_info
*vinfo
= stmt_info
->vinfo
;
2633 tree fndecl
, new_temp
, rhs_type
;
2635 enum vect_def_type dt
[3]
2636 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2638 gimple
*new_stmt
= NULL
;
2640 vec
<tree
> vargs
= vNULL
;
2641 enum { NARROW
, NONE
, WIDEN
} modifier
;
2645 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2648 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2652 /* Is GS a vectorizable call? */
2653 stmt
= dyn_cast
<gcall
*> (gs
);
2657 if (gimple_call_internal_p (stmt
)
2658 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2659 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2660 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2663 if (gimple_call_lhs (stmt
) == NULL_TREE
2664 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2667 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2669 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2671 /* Process function arguments. */
2672 rhs_type
= NULL_TREE
;
2673 vectype_in
= NULL_TREE
;
2674 nargs
= gimple_call_num_args (stmt
);
2676 /* Bail out if the function has more than three arguments, we do not have
2677 interesting builtin functions to vectorize with more than two arguments
2678 except for fma. No arguments is also not good. */
2679 if (nargs
== 0 || nargs
> 3)
2682 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2683 if (gimple_call_internal_p (stmt
)
2684 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2687 rhs_type
= unsigned_type_node
;
2690 for (i
= 0; i
< nargs
; i
++)
2694 op
= gimple_call_arg (stmt
, i
);
2696 /* We can only handle calls with arguments of the same type. */
2698 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2700 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2702 "argument types differ.\n");
2706 rhs_type
= TREE_TYPE (op
);
2708 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2710 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2712 "use not simple.\n");
2717 vectype_in
= opvectype
;
2719 && opvectype
!= vectype_in
)
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2723 "argument vector types differ.\n");
2727 /* If all arguments are external or constant defs use a vector type with
2728 the same size as the output vector type. */
2730 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2732 gcc_assert (vectype_in
);
2735 if (dump_enabled_p ())
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2738 "no vectype for scalar type ");
2739 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2740 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2747 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2748 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2749 if (nunits_in
== nunits_out
/ 2)
2751 else if (nunits_out
== nunits_in
)
2753 else if (nunits_out
== nunits_in
/ 2)
2758 /* We only handle functions that do not read or clobber memory. */
2759 if (gimple_vuse (stmt
))
2761 if (dump_enabled_p ())
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2763 "function reads from or writes to memory.\n");
2767 /* For now, we only vectorize functions if a target specific builtin
2768 is available. TODO -- in some cases, it might be profitable to
2769 insert the calls for pieces of the vector, in order to be able
2770 to vectorize other operations in the loop. */
2772 internal_fn ifn
= IFN_LAST
;
2773 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2774 tree callee
= gimple_call_fndecl (stmt
);
2776 /* First try using an internal function. */
2777 tree_code convert_code
= ERROR_MARK
;
2779 && (modifier
== NONE
2780 || (modifier
== NARROW
2781 && simple_integer_narrowing (vectype_out
, vectype_in
,
2783 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2786 /* If that fails, try asking for a target-specific built-in function. */
2787 if (ifn
== IFN_LAST
)
2789 if (cfn
!= CFN_LAST
)
2790 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2791 (cfn
, vectype_out
, vectype_in
);
2793 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2794 (callee
, vectype_out
, vectype_in
);
2797 if (ifn
== IFN_LAST
&& !fndecl
)
2799 if (cfn
== CFN_GOMP_SIMD_LANE
2802 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2803 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2804 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2805 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2807 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2808 { 0, 1, 2, ... vf - 1 } vector. */
2809 gcc_assert (nargs
== 0);
2811 else if (modifier
== NONE
2812 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2813 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2814 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2815 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2821 "function is not vectorizable.\n");
2828 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2829 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
2831 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
2833 /* Sanity check: make sure that at least one copy of the vectorized stmt
2834 needs to be generated. */
2835 gcc_assert (ncopies
>= 1);
2837 if (!vec_stmt
) /* transformation not required. */
2839 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2840 if (dump_enabled_p ())
2841 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2843 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
2844 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2845 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2846 vec_promote_demote
, stmt_info
, 0, vect_body
);
2853 if (dump_enabled_p ())
2854 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2857 scalar_dest
= gimple_call_lhs (stmt
);
2858 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2860 prev_stmt_info
= NULL
;
2861 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2863 tree prev_res
= NULL_TREE
;
2864 for (j
= 0; j
< ncopies
; ++j
)
2866 /* Build argument list for the vectorized call. */
2868 vargs
.create (nargs
);
2874 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2875 vec
<tree
> vec_oprnds0
;
2877 for (i
= 0; i
< nargs
; i
++)
2878 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2879 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
2880 vec_oprnds0
= vec_defs
[0];
2882 /* Arguments are ready. Create the new vector stmt. */
2883 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2886 for (k
= 0; k
< nargs
; k
++)
2888 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2889 vargs
[k
] = vec_oprndsk
[i
];
2891 if (modifier
== NARROW
)
2893 tree half_res
= make_ssa_name (vectype_in
);
2895 = gimple_build_call_internal_vec (ifn
, vargs
);
2896 gimple_call_set_lhs (call
, half_res
);
2897 gimple_call_set_nothrow (call
, true);
2899 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2902 prev_res
= half_res
;
2905 new_temp
= make_ssa_name (vec_dest
);
2906 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2907 prev_res
, half_res
);
2912 if (ifn
!= IFN_LAST
)
2913 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2915 call
= gimple_build_call_vec (fndecl
, vargs
);
2916 new_temp
= make_ssa_name (vec_dest
, call
);
2917 gimple_call_set_lhs (call
, new_temp
);
2918 gimple_call_set_nothrow (call
, true);
2921 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2922 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2925 for (i
= 0; i
< nargs
; i
++)
2927 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2928 vec_oprndsi
.release ();
2933 for (i
= 0; i
< nargs
; i
++)
2935 op
= gimple_call_arg (stmt
, i
);
2938 = vect_get_vec_def_for_operand (op
, stmt
);
2941 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2943 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2946 vargs
.quick_push (vec_oprnd0
);
2949 if (gimple_call_internal_p (stmt
)
2950 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2952 tree_vector_builder
v (vectype_out
, 1, 3);
2953 for (int k
= 0; k
< 3; ++k
)
2954 v
.quick_push (build_int_cst (unsigned_type_node
,
2955 j
* nunits_out
+ k
));
2956 tree cst
= v
.build ();
2958 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2959 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2960 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2961 new_temp
= make_ssa_name (vec_dest
);
2962 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2964 else if (modifier
== NARROW
)
2966 tree half_res
= make_ssa_name (vectype_in
);
2967 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
2968 gimple_call_set_lhs (call
, half_res
);
2969 gimple_call_set_nothrow (call
, true);
2971 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2974 prev_res
= half_res
;
2977 new_temp
= make_ssa_name (vec_dest
);
2978 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2979 prev_res
, half_res
);
2984 if (ifn
!= IFN_LAST
)
2985 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2987 call
= gimple_build_call_vec (fndecl
, vargs
);
2988 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2989 gimple_call_set_lhs (call
, new_temp
);
2990 gimple_call_set_nothrow (call
, true);
2993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2995 if (j
== (modifier
== NARROW
? 1 : 0))
2996 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2998 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3000 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3003 else if (modifier
== NARROW
)
3005 for (j
= 0; j
< ncopies
; ++j
)
3007 /* Build argument list for the vectorized call. */
3009 vargs
.create (nargs
* 2);
3015 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3016 vec
<tree
> vec_oprnds0
;
3018 for (i
= 0; i
< nargs
; i
++)
3019 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3020 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3021 vec_oprnds0
= vec_defs
[0];
3023 /* Arguments are ready. Create the new vector stmt. */
3024 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3028 for (k
= 0; k
< nargs
; k
++)
3030 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3031 vargs
.quick_push (vec_oprndsk
[i
]);
3032 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3035 if (ifn
!= IFN_LAST
)
3036 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3038 call
= gimple_build_call_vec (fndecl
, vargs
);
3039 new_temp
= make_ssa_name (vec_dest
, call
);
3040 gimple_call_set_lhs (call
, new_temp
);
3041 gimple_call_set_nothrow (call
, true);
3043 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3044 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3047 for (i
= 0; i
< nargs
; i
++)
3049 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3050 vec_oprndsi
.release ();
3055 for (i
= 0; i
< nargs
; i
++)
3057 op
= gimple_call_arg (stmt
, i
);
3061 = vect_get_vec_def_for_operand (op
, stmt
);
3063 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3067 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3069 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3071 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3074 vargs
.quick_push (vec_oprnd0
);
3075 vargs
.quick_push (vec_oprnd1
);
3078 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3079 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3080 gimple_call_set_lhs (new_stmt
, new_temp
);
3081 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3084 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3086 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3088 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3091 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3094 /* No current target implements this case. */
3099 /* The call in STMT might prevent it from being removed in dce.
3100 We however cannot remove it here, due to the way the ssa name
3101 it defines is mapped to the new definition. So just replace
3102 rhs of the statement with something harmless. */
3107 type
= TREE_TYPE (scalar_dest
);
3108 if (is_pattern_stmt_p (stmt_info
))
3109 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3111 lhs
= gimple_call_lhs (stmt
);
3113 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3114 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3115 set_vinfo_for_stmt (stmt
, NULL
);
3116 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3117 gsi_replace (gsi
, new_stmt
, false);
3123 struct simd_call_arg_info
3127 HOST_WIDE_INT linear_step
;
3128 enum vect_def_type dt
;
3130 bool simd_lane_linear
;
3133 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3134 is linear within simd lane (but not within whole loop), note it in
3138 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3139 struct simd_call_arg_info
*arginfo
)
3141 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3143 if (!is_gimple_assign (def_stmt
)
3144 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3145 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3148 tree base
= gimple_assign_rhs1 (def_stmt
);
3149 HOST_WIDE_INT linear_step
= 0;
3150 tree v
= gimple_assign_rhs2 (def_stmt
);
3151 while (TREE_CODE (v
) == SSA_NAME
)
3154 def_stmt
= SSA_NAME_DEF_STMT (v
);
3155 if (is_gimple_assign (def_stmt
))
3156 switch (gimple_assign_rhs_code (def_stmt
))
3159 t
= gimple_assign_rhs2 (def_stmt
);
3160 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3162 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3163 v
= gimple_assign_rhs1 (def_stmt
);
3166 t
= gimple_assign_rhs2 (def_stmt
);
3167 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3169 linear_step
= tree_to_shwi (t
);
3170 v
= gimple_assign_rhs1 (def_stmt
);
3173 t
= gimple_assign_rhs1 (def_stmt
);
3174 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3175 || (TYPE_PRECISION (TREE_TYPE (v
))
3176 < TYPE_PRECISION (TREE_TYPE (t
))))
3185 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3187 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3188 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3193 arginfo
->linear_step
= linear_step
;
3195 arginfo
->simd_lane_linear
= true;
3201 /* Function vectorizable_simd_clone_call.
3203 Check if STMT performs a function call that can be vectorized
3204 by calling a simd clone of the function.
3205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3210 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3211 gimple
**vec_stmt
, slp_tree slp_node
)
3216 tree vec_oprnd0
= NULL_TREE
;
3217 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3219 unsigned int nunits
;
3220 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3221 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3222 vec_info
*vinfo
= stmt_info
->vinfo
;
3223 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3224 tree fndecl
, new_temp
;
3226 gimple
*new_stmt
= NULL
;
3228 auto_vec
<simd_call_arg_info
> arginfo
;
3229 vec
<tree
> vargs
= vNULL
;
3231 tree lhs
, rtype
, ratype
;
3232 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3234 /* Is STMT a vectorizable call? */
3235 if (!is_gimple_call (stmt
))
3238 fndecl
= gimple_call_fndecl (stmt
);
3239 if (fndecl
== NULL_TREE
)
3242 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3243 if (node
== NULL
|| node
->simd_clones
== NULL
)
3246 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3249 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3253 if (gimple_call_lhs (stmt
)
3254 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3257 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3259 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3261 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3268 /* Process function arguments. */
3269 nargs
= gimple_call_num_args (stmt
);
3271 /* Bail out if the function has zero arguments. */
3275 arginfo
.reserve (nargs
, true);
3277 for (i
= 0; i
< nargs
; i
++)
3279 simd_call_arg_info thisarginfo
;
3282 thisarginfo
.linear_step
= 0;
3283 thisarginfo
.align
= 0;
3284 thisarginfo
.op
= NULL_TREE
;
3285 thisarginfo
.simd_lane_linear
= false;
3287 op
= gimple_call_arg (stmt
, i
);
3288 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3289 &thisarginfo
.vectype
)
3290 || thisarginfo
.dt
== vect_uninitialized_def
)
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3294 "use not simple.\n");
3298 if (thisarginfo
.dt
== vect_constant_def
3299 || thisarginfo
.dt
== vect_external_def
)
3300 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3302 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3304 /* For linear arguments, the analyze phase should have saved
3305 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3306 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3307 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3309 gcc_assert (vec_stmt
);
3310 thisarginfo
.linear_step
3311 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3313 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3314 thisarginfo
.simd_lane_linear
3315 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3316 == boolean_true_node
);
3317 /* If loop has been peeled for alignment, we need to adjust it. */
3318 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3319 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3320 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3322 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3323 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3324 tree opt
= TREE_TYPE (thisarginfo
.op
);
3325 bias
= fold_convert (TREE_TYPE (step
), bias
);
3326 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3328 = fold_build2 (POINTER_TYPE_P (opt
)
3329 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3330 thisarginfo
.op
, bias
);
3334 && thisarginfo
.dt
!= vect_constant_def
3335 && thisarginfo
.dt
!= vect_external_def
3337 && TREE_CODE (op
) == SSA_NAME
3338 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3340 && tree_fits_shwi_p (iv
.step
))
3342 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3343 thisarginfo
.op
= iv
.base
;
3345 else if ((thisarginfo
.dt
== vect_constant_def
3346 || thisarginfo
.dt
== vect_external_def
)
3347 && POINTER_TYPE_P (TREE_TYPE (op
)))
3348 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3349 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3351 if (POINTER_TYPE_P (TREE_TYPE (op
))
3352 && !thisarginfo
.linear_step
3354 && thisarginfo
.dt
!= vect_constant_def
3355 && thisarginfo
.dt
!= vect_external_def
3358 && TREE_CODE (op
) == SSA_NAME
)
3359 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3361 arginfo
.quick_push (thisarginfo
);
3364 unsigned int badness
= 0;
3365 struct cgraph_node
*bestn
= NULL
;
3366 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3367 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3369 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3370 n
= n
->simdclone
->next_clone
)
3372 unsigned int this_badness
= 0;
3373 if (n
->simdclone
->simdlen
3374 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3375 || n
->simdclone
->nargs
!= nargs
)
3377 if (n
->simdclone
->simdlen
3378 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3379 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3380 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3381 if (n
->simdclone
->inbranch
)
3382 this_badness
+= 2048;
3383 int target_badness
= targetm
.simd_clone
.usable (n
);
3384 if (target_badness
< 0)
3386 this_badness
+= target_badness
* 512;
3387 /* FORNOW: Have to add code to add the mask argument. */
3388 if (n
->simdclone
->inbranch
)
3390 for (i
= 0; i
< nargs
; i
++)
3392 switch (n
->simdclone
->args
[i
].arg_type
)
3394 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3395 if (!useless_type_conversion_p
3396 (n
->simdclone
->args
[i
].orig_type
,
3397 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3399 else if (arginfo
[i
].dt
== vect_constant_def
3400 || arginfo
[i
].dt
== vect_external_def
3401 || arginfo
[i
].linear_step
)
3404 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3405 if (arginfo
[i
].dt
!= vect_constant_def
3406 && arginfo
[i
].dt
!= vect_external_def
)
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3411 if (arginfo
[i
].dt
== vect_constant_def
3412 || arginfo
[i
].dt
== vect_external_def
3413 || (arginfo
[i
].linear_step
3414 != n
->simdclone
->args
[i
].linear_step
))
3417 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3418 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3419 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3420 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3421 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3422 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3426 case SIMD_CLONE_ARG_TYPE_MASK
:
3429 if (i
== (size_t) -1)
3431 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3436 if (arginfo
[i
].align
)
3437 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3438 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3440 if (i
== (size_t) -1)
3442 if (bestn
== NULL
|| this_badness
< badness
)
3445 badness
= this_badness
;
3452 for (i
= 0; i
< nargs
; i
++)
3453 if ((arginfo
[i
].dt
== vect_constant_def
3454 || arginfo
[i
].dt
== vect_external_def
)
3455 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3458 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3460 if (arginfo
[i
].vectype
== NULL
3461 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3462 > bestn
->simdclone
->simdlen
))
3466 fndecl
= bestn
->decl
;
3467 nunits
= bestn
->simdclone
->simdlen
;
3468 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3470 /* If the function isn't const, only allow it in simd loops where user
3471 has asserted that at least nunits consecutive iterations can be
3472 performed using SIMD instructions. */
3473 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3474 && gimple_vuse (stmt
))
3477 /* Sanity check: make sure that at least one copy of the vectorized stmt
3478 needs to be generated. */
3479 gcc_assert (ncopies
>= 1);
3481 if (!vec_stmt
) /* transformation not required. */
3483 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3484 for (i
= 0; i
< nargs
; i
++)
3485 if ((bestn
->simdclone
->args
[i
].arg_type
3486 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3487 || (bestn
->simdclone
->args
[i
].arg_type
3488 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3490 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3492 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3493 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3494 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3495 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3496 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3497 tree sll
= arginfo
[i
].simd_lane_linear
3498 ? boolean_true_node
: boolean_false_node
;
3499 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3501 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3502 if (dump_enabled_p ())
3503 dump_printf_loc (MSG_NOTE
, vect_location
,
3504 "=== vectorizable_simd_clone_call ===\n");
3505 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3515 scalar_dest
= gimple_call_lhs (stmt
);
3516 vec_dest
= NULL_TREE
;
3521 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3522 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3523 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3526 rtype
= TREE_TYPE (ratype
);
3530 prev_stmt_info
= NULL
;
3531 for (j
= 0; j
< ncopies
; ++j
)
3533 /* Build argument list for the vectorized call. */
3535 vargs
.create (nargs
);
3539 for (i
= 0; i
< nargs
; i
++)
3541 unsigned int k
, l
, m
, o
;
3543 op
= gimple_call_arg (stmt
, i
);
3544 switch (bestn
->simdclone
->args
[i
].arg_type
)
3546 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3547 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3548 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3549 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3551 if (TYPE_VECTOR_SUBPARTS (atype
)
3552 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3554 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3555 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3556 / TYPE_VECTOR_SUBPARTS (atype
));
3557 gcc_assert ((k
& (k
- 1)) == 0);
3560 = vect_get_vec_def_for_operand (op
, stmt
);
3563 vec_oprnd0
= arginfo
[i
].op
;
3564 if ((m
& (k
- 1)) == 0)
3566 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3569 arginfo
[i
].op
= vec_oprnd0
;
3571 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3573 bitsize_int ((m
& (k
- 1)) * prec
));
3575 = gimple_build_assign (make_ssa_name (atype
),
3577 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3578 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3582 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3583 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3584 gcc_assert ((k
& (k
- 1)) == 0);
3585 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3587 vec_alloc (ctor_elts
, k
);
3590 for (l
= 0; l
< k
; l
++)
3592 if (m
== 0 && l
== 0)
3594 = vect_get_vec_def_for_operand (op
, stmt
);
3597 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3599 arginfo
[i
].op
= vec_oprnd0
;
3602 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3606 vargs
.safe_push (vec_oprnd0
);
3609 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3611 = gimple_build_assign (make_ssa_name (atype
),
3613 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3614 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3619 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3620 vargs
.safe_push (op
);
3622 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3623 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3628 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3633 edge pe
= loop_preheader_edge (loop
);
3634 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3635 gcc_assert (!new_bb
);
3637 if (arginfo
[i
].simd_lane_linear
)
3639 vargs
.safe_push (arginfo
[i
].op
);
3642 tree phi_res
= copy_ssa_name (op
);
3643 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3644 set_vinfo_for_stmt (new_phi
,
3645 new_stmt_vec_info (new_phi
, loop_vinfo
));
3646 add_phi_arg (new_phi
, arginfo
[i
].op
,
3647 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3649 = POINTER_TYPE_P (TREE_TYPE (op
))
3650 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3651 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3652 ? sizetype
: TREE_TYPE (op
);
3654 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3656 tree tcst
= wide_int_to_tree (type
, cst
);
3657 tree phi_arg
= copy_ssa_name (op
);
3659 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3660 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3661 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3662 set_vinfo_for_stmt (new_stmt
,
3663 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3664 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3666 arginfo
[i
].op
= phi_res
;
3667 vargs
.safe_push (phi_res
);
3672 = POINTER_TYPE_P (TREE_TYPE (op
))
3673 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3674 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3675 ? sizetype
: TREE_TYPE (op
);
3677 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3679 tree tcst
= wide_int_to_tree (type
, cst
);
3680 new_temp
= make_ssa_name (TREE_TYPE (op
));
3681 new_stmt
= gimple_build_assign (new_temp
, code
,
3682 arginfo
[i
].op
, tcst
);
3683 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3684 vargs
.safe_push (new_temp
);
3687 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3688 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3689 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3690 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3691 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3692 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3698 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3701 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3703 new_temp
= create_tmp_var (ratype
);
3704 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3705 == TYPE_VECTOR_SUBPARTS (rtype
))
3706 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3708 new_temp
= make_ssa_name (rtype
, new_stmt
);
3709 gimple_call_set_lhs (new_stmt
, new_temp
);
3711 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3715 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3718 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3719 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3720 gcc_assert ((k
& (k
- 1)) == 0);
3721 for (l
= 0; l
< k
; l
++)
3726 t
= build_fold_addr_expr (new_temp
);
3727 t
= build2 (MEM_REF
, vectype
, t
,
3728 build_int_cst (TREE_TYPE (t
),
3729 l
* prec
/ BITS_PER_UNIT
));
3732 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3733 bitsize_int (prec
), bitsize_int (l
* prec
));
3735 = gimple_build_assign (make_ssa_name (vectype
), t
);
3736 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3737 if (j
== 0 && l
== 0)
3738 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3740 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3742 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3747 tree clobber
= build_constructor (ratype
, NULL
);
3748 TREE_THIS_VOLATILE (clobber
) = 1;
3749 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3750 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3754 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3756 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3757 / TYPE_VECTOR_SUBPARTS (rtype
));
3758 gcc_assert ((k
& (k
- 1)) == 0);
3759 if ((j
& (k
- 1)) == 0)
3760 vec_alloc (ret_ctor_elts
, k
);
3763 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3764 for (m
= 0; m
< o
; m
++)
3766 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3767 size_int (m
), NULL_TREE
, NULL_TREE
);
3769 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3770 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3771 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3772 gimple_assign_lhs (new_stmt
));
3774 tree clobber
= build_constructor (ratype
, NULL
);
3775 TREE_THIS_VOLATILE (clobber
) = 1;
3776 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3777 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3780 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3781 if ((j
& (k
- 1)) != k
- 1)
3783 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3785 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3786 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3788 if ((unsigned) j
== k
- 1)
3789 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3791 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3793 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3798 tree t
= build_fold_addr_expr (new_temp
);
3799 t
= build2 (MEM_REF
, vectype
, t
,
3800 build_int_cst (TREE_TYPE (t
), 0));
3802 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3803 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3804 tree clobber
= build_constructor (ratype
, NULL
);
3805 TREE_THIS_VOLATILE (clobber
) = 1;
3806 vect_finish_stmt_generation (stmt
,
3807 gimple_build_assign (new_temp
,
3813 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3815 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3817 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3822 /* The call in STMT might prevent it from being removed in dce.
3823 We however cannot remove it here, due to the way the ssa name
3824 it defines is mapped to the new definition. So just replace
3825 rhs of the statement with something harmless. */
3832 type
= TREE_TYPE (scalar_dest
);
3833 if (is_pattern_stmt_p (stmt_info
))
3834 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3836 lhs
= gimple_call_lhs (stmt
);
3837 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3840 new_stmt
= gimple_build_nop ();
3841 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3842 set_vinfo_for_stmt (stmt
, NULL
);
3843 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3844 gsi_replace (gsi
, new_stmt
, true);
3845 unlink_stmt_vdef (stmt
);
3851 /* Function vect_gen_widened_results_half
3853 Create a vector stmt whose code, type, number of arguments, and result
3854 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3855 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3856 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3857 needs to be created (DECL is a function-decl of a target-builtin).
3858 STMT is the original scalar stmt that we are vectorizing. */
3861 vect_gen_widened_results_half (enum tree_code code
,
3863 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3864 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3870 /* Generate half of the widened result: */
3871 if (code
== CALL_EXPR
)
3873 /* Target specific support */
3874 if (op_type
== binary_op
)
3875 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3877 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3878 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3879 gimple_call_set_lhs (new_stmt
, new_temp
);
3883 /* Generic support */
3884 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3885 if (op_type
!= binary_op
)
3887 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3888 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3889 gimple_assign_set_lhs (new_stmt
, new_temp
);
3891 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3897 /* Get vectorized definitions for loop-based vectorization. For the first
3898 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3899 scalar operand), and for the rest we get a copy with
3900 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3901 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3902 The vectors are collected into VEC_OPRNDS. */
3905 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3906 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3910 /* Get first vector operand. */
3911 /* All the vector operands except the very first one (that is scalar oprnd)
3913 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3914 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3916 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3918 vec_oprnds
->quick_push (vec_oprnd
);
3920 /* Get second vector operand. */
3921 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3922 vec_oprnds
->quick_push (vec_oprnd
);
3926 /* For conversion in multiple steps, continue to get operands
3929 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3933 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3934 For multi-step conversions store the resulting vectors and call the function
3938 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3939 int multi_step_cvt
, gimple
*stmt
,
3941 gimple_stmt_iterator
*gsi
,
3942 slp_tree slp_node
, enum tree_code code
,
3943 stmt_vec_info
*prev_stmt_info
)
3946 tree vop0
, vop1
, new_tmp
, vec_dest
;
3948 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3950 vec_dest
= vec_dsts
.pop ();
3952 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3954 /* Create demotion operation. */
3955 vop0
= (*vec_oprnds
)[i
];
3956 vop1
= (*vec_oprnds
)[i
+ 1];
3957 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3958 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3959 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3960 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3963 /* Store the resulting vector for next recursive call. */
3964 (*vec_oprnds
)[i
/2] = new_tmp
;
3967 /* This is the last step of the conversion sequence. Store the
3968 vectors in SLP_NODE or in vector info of the scalar statement
3969 (or in STMT_VINFO_RELATED_STMT chain). */
3971 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3974 if (!*prev_stmt_info
)
3975 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3977 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3979 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3984 /* For multi-step demotion operations we first generate demotion operations
3985 from the source type to the intermediate types, and then combine the
3986 results (stored in VEC_OPRNDS) in demotion operation to the destination
3990 /* At each level of recursion we have half of the operands we had at the
3992 vec_oprnds
->truncate ((i
+1)/2);
3993 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3994 stmt
, vec_dsts
, gsi
, slp_node
,
3995 VEC_PACK_TRUNC_EXPR
,
3999 vec_dsts
.quick_push (vec_dest
);
4003 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4004 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4005 the resulting vectors and call the function recursively. */
4008 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4009 vec
<tree
> *vec_oprnds1
,
4010 gimple
*stmt
, tree vec_dest
,
4011 gimple_stmt_iterator
*gsi
,
4012 enum tree_code code1
,
4013 enum tree_code code2
, tree decl1
,
4014 tree decl2
, int op_type
)
4017 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4018 gimple
*new_stmt1
, *new_stmt2
;
4019 vec
<tree
> vec_tmp
= vNULL
;
4021 vec_tmp
.create (vec_oprnds0
->length () * 2);
4022 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4024 if (op_type
== binary_op
)
4025 vop1
= (*vec_oprnds1
)[i
];
4029 /* Generate the two halves of promotion operation. */
4030 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4031 op_type
, vec_dest
, gsi
, stmt
);
4032 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4033 op_type
, vec_dest
, gsi
, stmt
);
4034 if (is_gimple_call (new_stmt1
))
4036 new_tmp1
= gimple_call_lhs (new_stmt1
);
4037 new_tmp2
= gimple_call_lhs (new_stmt2
);
4041 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4042 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4045 /* Store the results for the next step. */
4046 vec_tmp
.quick_push (new_tmp1
);
4047 vec_tmp
.quick_push (new_tmp2
);
4050 vec_oprnds0
->release ();
4051 *vec_oprnds0
= vec_tmp
;
4055 /* Check if STMT performs a conversion operation, that can be vectorized.
4056 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4057 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4058 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4061 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4062 gimple
**vec_stmt
, slp_tree slp_node
)
4066 tree op0
, op1
= NULL_TREE
;
4067 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4068 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4069 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4070 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4071 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4072 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4075 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4077 gimple
*new_stmt
= NULL
;
4078 stmt_vec_info prev_stmt_info
;
4081 tree vectype_out
, vectype_in
;
4083 tree lhs_type
, rhs_type
;
4084 enum { NARROW
, NONE
, WIDEN
} modifier
;
4085 vec
<tree
> vec_oprnds0
= vNULL
;
4086 vec
<tree
> vec_oprnds1
= vNULL
;
4088 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4089 vec_info
*vinfo
= stmt_info
->vinfo
;
4090 int multi_step_cvt
= 0;
4091 vec
<tree
> interm_types
= vNULL
;
4092 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4094 unsigned short fltsz
;
4096 /* Is STMT a vectorizable conversion? */
4098 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4101 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4105 if (!is_gimple_assign (stmt
))
4108 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4111 code
= gimple_assign_rhs_code (stmt
);
4112 if (!CONVERT_EXPR_CODE_P (code
)
4113 && code
!= FIX_TRUNC_EXPR
4114 && code
!= FLOAT_EXPR
4115 && code
!= WIDEN_MULT_EXPR
4116 && code
!= WIDEN_LSHIFT_EXPR
)
4119 op_type
= TREE_CODE_LENGTH (code
);
4121 /* Check types of lhs and rhs. */
4122 scalar_dest
= gimple_assign_lhs (stmt
);
4123 lhs_type
= TREE_TYPE (scalar_dest
);
4124 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4126 op0
= gimple_assign_rhs1 (stmt
);
4127 rhs_type
= TREE_TYPE (op0
);
4129 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4130 && !((INTEGRAL_TYPE_P (lhs_type
)
4131 && INTEGRAL_TYPE_P (rhs_type
))
4132 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4133 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4136 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4137 && ((INTEGRAL_TYPE_P (lhs_type
)
4138 && !type_has_mode_precision_p (lhs_type
))
4139 || (INTEGRAL_TYPE_P (rhs_type
)
4140 && !type_has_mode_precision_p (rhs_type
))))
4142 if (dump_enabled_p ())
4143 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4144 "type conversion to/from bit-precision unsupported."
4149 /* Check the operands of the operation. */
4150 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4154 "use not simple.\n");
4157 if (op_type
== binary_op
)
4161 op1
= gimple_assign_rhs2 (stmt
);
4162 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4163 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4165 if (CONSTANT_CLASS_P (op0
))
4166 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4168 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4172 if (dump_enabled_p ())
4173 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4174 "use not simple.\n");
4179 /* If op0 is an external or constant defs use a vector type of
4180 the same size as the output vector type. */
4182 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4184 gcc_assert (vectype_in
);
4187 if (dump_enabled_p ())
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4190 "no vectype for scalar type ");
4191 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4192 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4198 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4199 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4201 if (dump_enabled_p ())
4203 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4204 "can't convert between boolean and non "
4206 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4207 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4213 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4214 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4215 if (nunits_in
< nunits_out
)
4217 else if (nunits_out
== nunits_in
)
4222 /* Multiple types in SLP are handled by creating the appropriate number of
4223 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4227 else if (modifier
== NARROW
)
4228 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4230 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4232 /* Sanity check: make sure that at least one copy of the vectorized stmt
4233 needs to be generated. */
4234 gcc_assert (ncopies
>= 1);
4236 bool found_mode
= false;
4237 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4238 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4239 opt_scalar_mode rhs_mode_iter
;
4241 /* Supportable by target? */
4245 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4247 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4252 if (dump_enabled_p ())
4253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4254 "conversion not supported by target.\n");
4258 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4259 &code1
, &code2
, &multi_step_cvt
,
4262 /* Binary widening operation can only be supported directly by the
4264 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4268 if (code
!= FLOAT_EXPR
4269 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4272 fltsz
= GET_MODE_SIZE (lhs_mode
);
4273 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4275 rhs_mode
= rhs_mode_iter
.require ();
4276 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4280 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4281 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4282 if (cvt_type
== NULL_TREE
)
4285 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4287 if (!supportable_convert_operation (code
, vectype_out
,
4288 cvt_type
, &decl1
, &codecvt1
))
4291 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4292 cvt_type
, &codecvt1
,
4293 &codecvt2
, &multi_step_cvt
,
4297 gcc_assert (multi_step_cvt
== 0);
4299 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4300 vectype_in
, &code1
, &code2
,
4301 &multi_step_cvt
, &interm_types
))
4311 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4312 codecvt2
= ERROR_MARK
;
4316 interm_types
.safe_push (cvt_type
);
4317 cvt_type
= NULL_TREE
;
4322 gcc_assert (op_type
== unary_op
);
4323 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4324 &code1
, &multi_step_cvt
,
4328 if (code
!= FIX_TRUNC_EXPR
4329 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4333 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4334 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4335 if (cvt_type
== NULL_TREE
)
4337 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4340 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4341 &code1
, &multi_step_cvt
,
4350 if (!vec_stmt
) /* transformation not required. */
4352 if (dump_enabled_p ())
4353 dump_printf_loc (MSG_NOTE
, vect_location
,
4354 "=== vectorizable_conversion ===\n");
4355 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4357 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4358 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4360 else if (modifier
== NARROW
)
4362 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4363 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4367 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4368 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4370 interm_types
.release ();
4375 if (dump_enabled_p ())
4376 dump_printf_loc (MSG_NOTE
, vect_location
,
4377 "transform conversion. ncopies = %d.\n", ncopies
);
4379 if (op_type
== binary_op
)
4381 if (CONSTANT_CLASS_P (op0
))
4382 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4383 else if (CONSTANT_CLASS_P (op1
))
4384 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4387 /* In case of multi-step conversion, we first generate conversion operations
4388 to the intermediate types, and then from that types to the final one.
4389 We create vector destinations for the intermediate type (TYPES) received
4390 from supportable_*_operation, and store them in the correct order
4391 for future use in vect_create_vectorized_*_stmts (). */
4392 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4393 vec_dest
= vect_create_destination_var (scalar_dest
,
4394 (cvt_type
&& modifier
== WIDEN
)
4395 ? cvt_type
: vectype_out
);
4396 vec_dsts
.quick_push (vec_dest
);
4400 for (i
= interm_types
.length () - 1;
4401 interm_types
.iterate (i
, &intermediate_type
); i
--)
4403 vec_dest
= vect_create_destination_var (scalar_dest
,
4405 vec_dsts
.quick_push (vec_dest
);
4410 vec_dest
= vect_create_destination_var (scalar_dest
,
4412 ? vectype_out
: cvt_type
);
4416 if (modifier
== WIDEN
)
4418 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4419 if (op_type
== binary_op
)
4420 vec_oprnds1
.create (1);
4422 else if (modifier
== NARROW
)
4423 vec_oprnds0
.create (
4424 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4426 else if (code
== WIDEN_LSHIFT_EXPR
)
4427 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4430 prev_stmt_info
= NULL
;
4434 for (j
= 0; j
< ncopies
; j
++)
4437 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4439 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4441 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4443 /* Arguments are ready, create the new vector stmt. */
4444 if (code1
== CALL_EXPR
)
4446 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4447 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4448 gimple_call_set_lhs (new_stmt
, new_temp
);
4452 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4453 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4454 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4455 gimple_assign_set_lhs (new_stmt
, new_temp
);
4458 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4460 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4463 if (!prev_stmt_info
)
4464 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4466 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4467 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4474 /* In case the vectorization factor (VF) is bigger than the number
4475 of elements that we can fit in a vectype (nunits), we have to
4476 generate more than one vector stmt - i.e - we need to "unroll"
4477 the vector stmt by a factor VF/nunits. */
4478 for (j
= 0; j
< ncopies
; j
++)
4485 if (code
== WIDEN_LSHIFT_EXPR
)
4490 /* Store vec_oprnd1 for every vector stmt to be created
4491 for SLP_NODE. We check during the analysis that all
4492 the shift arguments are the same. */
4493 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4494 vec_oprnds1
.quick_push (vec_oprnd1
);
4496 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4500 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4501 &vec_oprnds1
, slp_node
);
4505 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4506 vec_oprnds0
.quick_push (vec_oprnd0
);
4507 if (op_type
== binary_op
)
4509 if (code
== WIDEN_LSHIFT_EXPR
)
4512 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4513 vec_oprnds1
.quick_push (vec_oprnd1
);
4519 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4520 vec_oprnds0
.truncate (0);
4521 vec_oprnds0
.quick_push (vec_oprnd0
);
4522 if (op_type
== binary_op
)
4524 if (code
== WIDEN_LSHIFT_EXPR
)
4527 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4529 vec_oprnds1
.truncate (0);
4530 vec_oprnds1
.quick_push (vec_oprnd1
);
4534 /* Arguments are ready. Create the new vector stmts. */
4535 for (i
= multi_step_cvt
; i
>= 0; i
--)
4537 tree this_dest
= vec_dsts
[i
];
4538 enum tree_code c1
= code1
, c2
= code2
;
4539 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4544 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4546 stmt
, this_dest
, gsi
,
4547 c1
, c2
, decl1
, decl2
,
4551 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4555 if (codecvt1
== CALL_EXPR
)
4557 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4558 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4559 gimple_call_set_lhs (new_stmt
, new_temp
);
4563 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4564 new_temp
= make_ssa_name (vec_dest
);
4565 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4569 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4572 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4575 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4578 if (!prev_stmt_info
)
4579 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4581 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4582 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4587 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4591 /* In case the vectorization factor (VF) is bigger than the number
4592 of elements that we can fit in a vectype (nunits), we have to
4593 generate more than one vector stmt - i.e - we need to "unroll"
4594 the vector stmt by a factor VF/nunits. */
4595 for (j
= 0; j
< ncopies
; j
++)
4599 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4603 vec_oprnds0
.truncate (0);
4604 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4605 vect_pow2 (multi_step_cvt
) - 1);
4608 /* Arguments are ready. Create the new vector stmts. */
4610 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4612 if (codecvt1
== CALL_EXPR
)
4614 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4615 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4616 gimple_call_set_lhs (new_stmt
, new_temp
);
4620 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4621 new_temp
= make_ssa_name (vec_dest
);
4622 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4626 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4627 vec_oprnds0
[i
] = new_temp
;
4630 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4631 stmt
, vec_dsts
, gsi
,
4636 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4640 vec_oprnds0
.release ();
4641 vec_oprnds1
.release ();
4642 interm_types
.release ();
4648 /* Function vectorizable_assignment.
4650 Check if STMT performs an assignment (copy) that can be vectorized.
4651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4652 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4656 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4657 gimple
**vec_stmt
, slp_tree slp_node
)
4662 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4663 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4666 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
4670 vec
<tree
> vec_oprnds
= vNULL
;
4672 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4673 vec_info
*vinfo
= stmt_info
->vinfo
;
4674 gimple
*new_stmt
= NULL
;
4675 stmt_vec_info prev_stmt_info
= NULL
;
4676 enum tree_code code
;
4679 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4682 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4686 /* Is vectorizable assignment? */
4687 if (!is_gimple_assign (stmt
))
4690 scalar_dest
= gimple_assign_lhs (stmt
);
4691 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4694 code
= gimple_assign_rhs_code (stmt
);
4695 if (gimple_assign_single_p (stmt
)
4696 || code
== PAREN_EXPR
4697 || CONVERT_EXPR_CODE_P (code
))
4698 op
= gimple_assign_rhs1 (stmt
);
4702 if (code
== VIEW_CONVERT_EXPR
)
4703 op
= TREE_OPERAND (op
, 0);
4705 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4706 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4708 /* Multiple types in SLP are handled by creating the appropriate number of
4709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4714 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4716 gcc_assert (ncopies
>= 1);
4718 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4722 "use not simple.\n");
4726 /* We can handle NOP_EXPR conversions that do not change the number
4727 of elements or the vector size. */
4728 if ((CONVERT_EXPR_CODE_P (code
)
4729 || code
== VIEW_CONVERT_EXPR
)
4731 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4732 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4733 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4736 /* We do not handle bit-precision changes. */
4737 if ((CONVERT_EXPR_CODE_P (code
)
4738 || code
== VIEW_CONVERT_EXPR
)
4739 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4740 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
4741 || !type_has_mode_precision_p (TREE_TYPE (op
)))
4742 /* But a conversion that does not change the bit-pattern is ok. */
4743 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4744 > TYPE_PRECISION (TREE_TYPE (op
)))
4745 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4746 /* Conversion between boolean types of different sizes is
4747 a simple assignment in case their vectypes are same
4749 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4750 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4754 "type conversion to/from bit-precision "
4759 if (!vec_stmt
) /* transformation not required. */
4761 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE
, vect_location
,
4764 "=== vectorizable_assignment ===\n");
4765 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4770 if (dump_enabled_p ())
4771 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4774 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4777 for (j
= 0; j
< ncopies
; j
++)
4781 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
4783 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4785 /* Arguments are ready. create the new vector stmt. */
4786 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4788 if (CONVERT_EXPR_CODE_P (code
)
4789 || code
== VIEW_CONVERT_EXPR
)
4790 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4791 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4792 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4793 gimple_assign_set_lhs (new_stmt
, new_temp
);
4794 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4796 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4803 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4805 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4807 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4810 vec_oprnds
.release ();
4815 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4816 either as shift by a scalar or by a vector. */
4819 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4822 machine_mode vec_mode
;
4827 vectype
= get_vectype_for_scalar_type (scalar_type
);
4831 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4833 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4835 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4837 || (optab_handler (optab
, TYPE_MODE (vectype
))
4838 == CODE_FOR_nothing
))
4842 vec_mode
= TYPE_MODE (vectype
);
4843 icode
= (int) optab_handler (optab
, vec_mode
);
4844 if (icode
== CODE_FOR_nothing
)
4851 /* Function vectorizable_shift.
4853 Check if STMT performs a shift operation that can be vectorized.
4854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4859 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4860 gimple
**vec_stmt
, slp_tree slp_node
)
4864 tree op0
, op1
= NULL
;
4865 tree vec_oprnd1
= NULL_TREE
;
4866 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4868 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4869 enum tree_code code
;
4870 machine_mode vec_mode
;
4874 machine_mode optab_op2_mode
;
4876 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4878 gimple
*new_stmt
= NULL
;
4879 stmt_vec_info prev_stmt_info
;
4886 vec
<tree
> vec_oprnds0
= vNULL
;
4887 vec
<tree
> vec_oprnds1
= vNULL
;
4890 bool scalar_shift_arg
= true;
4891 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4892 vec_info
*vinfo
= stmt_info
->vinfo
;
4894 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4897 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4901 /* Is STMT a vectorizable binary/unary operation? */
4902 if (!is_gimple_assign (stmt
))
4905 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4908 code
= gimple_assign_rhs_code (stmt
);
4910 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4911 || code
== RROTATE_EXPR
))
4914 scalar_dest
= gimple_assign_lhs (stmt
);
4915 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4916 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4920 "bit-precision shifts not supported.\n");
4924 op0
= gimple_assign_rhs1 (stmt
);
4925 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4927 if (dump_enabled_p ())
4928 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4929 "use not simple.\n");
4932 /* If op0 is an external or constant def use a vector type with
4933 the same size as the output vector type. */
4935 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4937 gcc_assert (vectype
);
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4942 "no vectype for scalar type\n");
4946 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4947 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4948 if (nunits_out
!= nunits_in
)
4951 op1
= gimple_assign_rhs2 (stmt
);
4952 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4954 if (dump_enabled_p ())
4955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4956 "use not simple.\n");
4960 /* Multiple types in SLP are handled by creating the appropriate number of
4961 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4966 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4968 gcc_assert (ncopies
>= 1);
4970 /* Determine whether the shift amount is a vector, or scalar. If the
4971 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4973 if ((dt
[1] == vect_internal_def
4974 || dt
[1] == vect_induction_def
)
4976 scalar_shift_arg
= false;
4977 else if (dt
[1] == vect_constant_def
4978 || dt
[1] == vect_external_def
4979 || dt
[1] == vect_internal_def
)
4981 /* In SLP, need to check whether the shift count is the same,
4982 in loops if it is a constant or invariant, it is always
4986 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4989 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4990 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4991 scalar_shift_arg
= false;
4994 /* If the shift amount is computed by a pattern stmt we cannot
4995 use the scalar amount directly thus give up and use a vector
4997 if (dt
[1] == vect_internal_def
)
4999 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
5000 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
5001 scalar_shift_arg
= false;
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5008 "operand mode requires invariant argument.\n");
5012 /* Vector shifted by vector. */
5013 if (!scalar_shift_arg
)
5015 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_NOTE
, vect_location
,
5018 "vector/vector shift/rotate found.\n");
5021 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5022 if (op1_vectype
== NULL_TREE
5023 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5027 "unusable type for last operand in"
5028 " vector/vector shift/rotate.\n");
5032 /* See if the machine has a vector shifted by scalar insn and if not
5033 then see if it has a vector shifted by vector insn. */
5036 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5038 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_NOTE
, vect_location
,
5042 "vector/scalar shift/rotate found.\n");
5046 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5048 && (optab_handler (optab
, TYPE_MODE (vectype
))
5049 != CODE_FOR_nothing
))
5051 scalar_shift_arg
= false;
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_NOTE
, vect_location
,
5055 "vector/vector shift/rotate found.\n");
5057 /* Unlike the other binary operators, shifts/rotates have
5058 the rhs being int, instead of the same type as the lhs,
5059 so make sure the scalar is the right type if we are
5060 dealing with vectors of long long/long/short/char. */
5061 if (dt
[1] == vect_constant_def
)
5062 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5063 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5067 && TYPE_MODE (TREE_TYPE (vectype
))
5068 != TYPE_MODE (TREE_TYPE (op1
)))
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5072 "unusable type for last operand in"
5073 " vector/vector shift/rotate.\n");
5076 if (vec_stmt
&& !slp_node
)
5078 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5079 op1
= vect_init_vector (stmt
, op1
,
5080 TREE_TYPE (vectype
), NULL
);
5087 /* Supportable by target? */
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5095 vec_mode
= TYPE_MODE (vectype
);
5096 icode
= (int) optab_handler (optab
, vec_mode
);
5097 if (icode
== CODE_FOR_nothing
)
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5101 "op not supported by target.\n");
5102 /* Check only during analysis. */
5103 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5105 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5107 if (dump_enabled_p ())
5108 dump_printf_loc (MSG_NOTE
, vect_location
,
5109 "proceeding using word mode.\n");
5112 /* Worthwhile without SIMD support? Check only during analysis. */
5114 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5115 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5117 if (dump_enabled_p ())
5118 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5119 "not worthwhile without SIMD support.\n");
5123 if (!vec_stmt
) /* transformation not required. */
5125 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5126 if (dump_enabled_p ())
5127 dump_printf_loc (MSG_NOTE
, vect_location
,
5128 "=== vectorizable_shift ===\n");
5129 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_NOTE
, vect_location
,
5137 "transform binary/unary operation.\n");
5140 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5142 prev_stmt_info
= NULL
;
5143 for (j
= 0; j
< ncopies
; j
++)
5148 if (scalar_shift_arg
)
5150 /* Vector shl and shr insn patterns can be defined with scalar
5151 operand 2 (shift operand). In this case, use constant or loop
5152 invariant op1 directly, without extending it to vector mode
5154 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5155 if (!VECTOR_MODE_P (optab_op2_mode
))
5157 if (dump_enabled_p ())
5158 dump_printf_loc (MSG_NOTE
, vect_location
,
5159 "operand 1 using scalar mode.\n");
5161 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5162 vec_oprnds1
.quick_push (vec_oprnd1
);
5165 /* Store vec_oprnd1 for every vector stmt to be created
5166 for SLP_NODE. We check during the analysis that all
5167 the shift arguments are the same.
5168 TODO: Allow different constants for different vector
5169 stmts generated for an SLP instance. */
5170 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5171 vec_oprnds1
.quick_push (vec_oprnd1
);
5176 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5177 (a special case for certain kind of vector shifts); otherwise,
5178 operand 1 should be of a vector type (the usual case). */
5180 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5183 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5187 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5189 /* Arguments are ready. Create the new vector stmt. */
5190 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5192 vop1
= vec_oprnds1
[i
];
5193 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5194 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5195 gimple_assign_set_lhs (new_stmt
, new_temp
);
5196 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5198 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5205 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5207 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5208 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5211 vec_oprnds0
.release ();
5212 vec_oprnds1
.release ();
5218 /* Function vectorizable_operation.
5220 Check if STMT performs a binary, unary or ternary operation that can
5222 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5223 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5224 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5227 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5228 gimple
**vec_stmt
, slp_tree slp_node
)
5232 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5233 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5235 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5236 enum tree_code code
, orig_code
;
5237 machine_mode vec_mode
;
5241 bool target_support_p
;
5243 enum vect_def_type dt
[3]
5244 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5246 gimple
*new_stmt
= NULL
;
5247 stmt_vec_info prev_stmt_info
;
5253 vec
<tree
> vec_oprnds0
= vNULL
;
5254 vec
<tree
> vec_oprnds1
= vNULL
;
5255 vec
<tree
> vec_oprnds2
= vNULL
;
5256 tree vop0
, vop1
, vop2
;
5257 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5258 vec_info
*vinfo
= stmt_info
->vinfo
;
5260 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5263 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5267 /* Is STMT a vectorizable binary/unary operation? */
5268 if (!is_gimple_assign (stmt
))
5271 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5274 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5276 /* For pointer addition and subtraction, we should use the normal
5277 plus and minus for the vector operation. */
5278 if (code
== POINTER_PLUS_EXPR
)
5280 if (code
== POINTER_DIFF_EXPR
)
5283 /* Support only unary or binary operations. */
5284 op_type
= TREE_CODE_LENGTH (code
);
5285 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5287 if (dump_enabled_p ())
5288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5289 "num. args = %d (not unary/binary/ternary op).\n",
5294 scalar_dest
= gimple_assign_lhs (stmt
);
5295 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5297 /* Most operations cannot handle bit-precision types without extra
5299 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5300 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5301 /* Exception are bitwise binary operations. */
5302 && code
!= BIT_IOR_EXPR
5303 && code
!= BIT_XOR_EXPR
5304 && code
!= BIT_AND_EXPR
)
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5308 "bit-precision arithmetic not supported.\n");
5312 op0
= gimple_assign_rhs1 (stmt
);
5313 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5315 if (dump_enabled_p ())
5316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5317 "use not simple.\n");
5320 /* If op0 is an external or constant def use a vector type with
5321 the same size as the output vector type. */
5324 /* For boolean type we cannot determine vectype by
5325 invariant value (don't know whether it is a vector
5326 of booleans or vector of integers). We use output
5327 vectype because operations on boolean don't change
5329 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5331 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5335 "not supported operation on bool value.\n");
5338 vectype
= vectype_out
;
5341 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5344 gcc_assert (vectype
);
5347 if (dump_enabled_p ())
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5350 "no vectype for scalar type ");
5351 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5353 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5359 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5360 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5361 if (nunits_out
!= nunits_in
)
5364 if (op_type
== binary_op
|| op_type
== ternary_op
)
5366 op1
= gimple_assign_rhs2 (stmt
);
5367 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5369 if (dump_enabled_p ())
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5371 "use not simple.\n");
5375 if (op_type
== ternary_op
)
5377 op2
= gimple_assign_rhs3 (stmt
);
5378 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5382 "use not simple.\n");
5387 /* Multiple types in SLP are handled by creating the appropriate number of
5388 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5393 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5395 gcc_assert (ncopies
>= 1);
5397 /* Shifts are handled in vectorizable_shift (). */
5398 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5399 || code
== RROTATE_EXPR
)
5402 /* Supportable by target? */
5404 vec_mode
= TYPE_MODE (vectype
);
5405 if (code
== MULT_HIGHPART_EXPR
)
5406 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5409 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5417 target_support_p
= (optab_handler (optab
, vec_mode
)
5418 != CODE_FOR_nothing
);
5421 if (!target_support_p
)
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5425 "op not supported by target.\n");
5426 /* Check only during analysis. */
5427 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5428 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5430 if (dump_enabled_p ())
5431 dump_printf_loc (MSG_NOTE
, vect_location
,
5432 "proceeding using word mode.\n");
5435 /* Worthwhile without SIMD support? Check only during analysis. */
5436 if (!VECTOR_MODE_P (vec_mode
)
5438 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5440 if (dump_enabled_p ())
5441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5442 "not worthwhile without SIMD support.\n");
5446 if (!vec_stmt
) /* transformation not required. */
5448 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_NOTE
, vect_location
,
5451 "=== vectorizable_operation ===\n");
5452 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5458 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_NOTE
, vect_location
,
5460 "transform binary/unary operation.\n");
5463 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5465 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5466 vectors with unsigned elements, but the result is signed. So, we
5467 need to compute the MINUS_EXPR into vectype temporary and
5468 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5469 tree vec_cvt_dest
= NULL_TREE
;
5470 if (orig_code
== POINTER_DIFF_EXPR
)
5471 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5473 /* In case the vectorization factor (VF) is bigger than the number
5474 of elements that we can fit in a vectype (nunits), we have to generate
5475 more than one vector stmt - i.e - we need to "unroll" the
5476 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5477 from one copy of the vector stmt to the next, in the field
5478 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5479 stages to find the correct vector defs to be used when vectorizing
5480 stmts that use the defs of the current stmt. The example below
5481 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5482 we need to create 4 vectorized stmts):
5484 before vectorization:
5485 RELATED_STMT VEC_STMT
5489 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5491 RELATED_STMT VEC_STMT
5492 VS1_0: vx0 = memref0 VS1_1 -
5493 VS1_1: vx1 = memref1 VS1_2 -
5494 VS1_2: vx2 = memref2 VS1_3 -
5495 VS1_3: vx3 = memref3 - -
5496 S1: x = load - VS1_0
5499 step2: vectorize stmt S2 (done here):
5500 To vectorize stmt S2 we first need to find the relevant vector
5501 def for the first operand 'x'. This is, as usual, obtained from
5502 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5503 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5504 relevant vector def 'vx0'. Having found 'vx0' we can generate
5505 the vector stmt VS2_0, and as usual, record it in the
5506 STMT_VINFO_VEC_STMT of stmt S2.
5507 When creating the second copy (VS2_1), we obtain the relevant vector
5508 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5509 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5510 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5511 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5512 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5513 chain of stmts and pointers:
5514 RELATED_STMT VEC_STMT
5515 VS1_0: vx0 = memref0 VS1_1 -
5516 VS1_1: vx1 = memref1 VS1_2 -
5517 VS1_2: vx2 = memref2 VS1_3 -
5518 VS1_3: vx3 = memref3 - -
5519 S1: x = load - VS1_0
5520 VS2_0: vz0 = vx0 + v1 VS2_1 -
5521 VS2_1: vz1 = vx1 + v1 VS2_2 -
5522 VS2_2: vz2 = vx2 + v1 VS2_3 -
5523 VS2_3: vz3 = vx3 + v1 - -
5524 S2: z = x + 1 - VS2_0 */
5526 prev_stmt_info
= NULL
;
5527 for (j
= 0; j
< ncopies
; j
++)
5532 if (op_type
== binary_op
|| op_type
== ternary_op
)
5533 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5536 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5538 if (op_type
== ternary_op
)
5539 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5544 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5545 if (op_type
== ternary_op
)
5547 tree vec_oprnd
= vec_oprnds2
.pop ();
5548 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5553 /* Arguments are ready. Create the new vector stmt. */
5554 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5556 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5557 ? vec_oprnds1
[i
] : NULL_TREE
);
5558 vop2
= ((op_type
== ternary_op
)
5559 ? vec_oprnds2
[i
] : NULL_TREE
);
5560 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5561 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5562 gimple_assign_set_lhs (new_stmt
, new_temp
);
5563 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5566 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
5567 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
5569 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
5570 gimple_assign_set_lhs (new_stmt
, new_temp
);
5571 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5574 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5581 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5583 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5584 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5587 vec_oprnds0
.release ();
5588 vec_oprnds1
.release ();
5589 vec_oprnds2
.release ();
5594 /* A helper function to ensure data reference DR's base alignment. */
5597 ensure_base_align (struct data_reference
*dr
)
5602 if (DR_VECT_AUX (dr
)->base_misaligned
)
5604 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5606 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
5608 if (decl_in_symtab_p (base_decl
))
5609 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
5612 SET_DECL_ALIGN (base_decl
, align_base_to
);
5613 DECL_USER_ALIGN (base_decl
) = 1;
5615 DR_VECT_AUX (dr
)->base_misaligned
= false;
5620 /* Function get_group_alias_ptr_type.
5622 Return the alias type for the group starting at FIRST_STMT. */
5625 get_group_alias_ptr_type (gimple
*first_stmt
)
5627 struct data_reference
*first_dr
, *next_dr
;
5630 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5631 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5634 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5635 if (get_alias_set (DR_REF (first_dr
))
5636 != get_alias_set (DR_REF (next_dr
)))
5638 if (dump_enabled_p ())
5639 dump_printf_loc (MSG_NOTE
, vect_location
,
5640 "conflicting alias set types.\n");
5641 return ptr_type_node
;
5643 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5645 return reference_alias_ptr_type (DR_REF (first_dr
));
5649 /* Function vectorizable_store.
5651 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5653 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5654 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5655 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5658 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5664 tree vec_oprnd
= NULL_TREE
;
5665 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5666 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5668 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5669 struct loop
*loop
= NULL
;
5670 machine_mode vec_mode
;
5672 enum dr_alignment_support alignment_support_scheme
;
5674 enum vect_def_type dt
;
5675 stmt_vec_info prev_stmt_info
= NULL
;
5676 tree dataref_ptr
= NULL_TREE
;
5677 tree dataref_offset
= NULL_TREE
;
5678 gimple
*ptr_incr
= NULL
;
5681 gimple
*next_stmt
, *first_stmt
;
5683 unsigned int group_size
, i
;
5684 vec
<tree
> oprnds
= vNULL
;
5685 vec
<tree
> result_chain
= vNULL
;
5687 tree offset
= NULL_TREE
;
5688 vec
<tree
> vec_oprnds
= vNULL
;
5689 bool slp
= (slp_node
!= NULL
);
5690 unsigned int vec_num
;
5691 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5692 vec_info
*vinfo
= stmt_info
->vinfo
;
5694 gather_scatter_info gs_info
;
5695 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5698 vec_load_store_type vls_type
;
5701 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5704 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5708 /* Is vectorizable store? */
5710 if (!is_gimple_assign (stmt
))
5713 scalar_dest
= gimple_assign_lhs (stmt
);
5714 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5715 && is_pattern_stmt_p (stmt_info
))
5716 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5717 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5718 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5719 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5720 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5721 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5722 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5723 && TREE_CODE (scalar_dest
) != MEM_REF
)
5726 /* Cannot have hybrid store SLP -- that would mean storing to the
5727 same location twice. */
5728 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5730 gcc_assert (gimple_assign_single_p (stmt
));
5732 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5733 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5737 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5738 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5743 /* Multiple types in SLP are handled by creating the appropriate number of
5744 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5749 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5751 gcc_assert (ncopies
>= 1);
5753 /* FORNOW. This restriction should be relaxed. */
5754 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5756 if (dump_enabled_p ())
5757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5758 "multiple types in nested loop.\n");
5762 op
= gimple_assign_rhs1 (stmt
);
5764 /* In the case this is a store from a constant make sure
5765 native_encode_expr can handle it. */
5766 if (CONSTANT_CLASS_P (op
) && native_encode_expr (op
, NULL
, 64) == 0)
5769 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5773 "use not simple.\n");
5777 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5778 vls_type
= VLS_STORE_INVARIANT
;
5780 vls_type
= VLS_STORE
;
5782 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5785 elem_type
= TREE_TYPE (vectype
);
5786 vec_mode
= TYPE_MODE (vectype
);
5788 /* FORNOW. In some cases can vectorize even if data-type not supported
5789 (e.g. - array initialization with 0). */
5790 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5793 if (!STMT_VINFO_DATA_REF (stmt_info
))
5796 vect_memory_access_type memory_access_type
;
5797 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5798 &memory_access_type
, &gs_info
))
5801 if (!vec_stmt
) /* transformation not required. */
5803 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5804 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5805 /* The SLP costs are calculated during SLP analysis. */
5806 if (!PURE_SLP_STMT (stmt_info
))
5807 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5811 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5815 ensure_base_align (dr
);
5817 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5819 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5820 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5821 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5822 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5823 edge pe
= loop_preheader_edge (loop
);
5826 enum { NARROW
, NONE
, WIDEN
} modifier
;
5827 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5829 if (nunits
== (unsigned int) scatter_off_nunits
)
5831 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5835 vec_perm_builder
sel (scatter_off_nunits
, scatter_off_nunits
, 1);
5836 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5837 sel
.quick_push (i
| nunits
);
5839 vec_perm_indices
indices (sel
, 1, scatter_off_nunits
);
5840 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
5842 gcc_assert (perm_mask
!= NULL_TREE
);
5844 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5848 vec_perm_builder
sel (nunits
, nunits
, 1);
5849 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5850 sel
.quick_push (i
| scatter_off_nunits
);
5852 vec_perm_indices
indices (sel
, 2, nunits
);
5853 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
5854 gcc_assert (perm_mask
!= NULL_TREE
);
5860 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5861 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5862 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5863 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5864 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5865 scaletype
= TREE_VALUE (arglist
);
5867 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5868 && TREE_CODE (rettype
) == VOID_TYPE
);
5870 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5871 if (!is_gimple_min_invariant (ptr
))
5873 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5874 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5875 gcc_assert (!new_bb
);
5878 /* Currently we support only unconditional scatter stores,
5879 so mask should be all ones. */
5880 mask
= build_int_cst (masktype
, -1);
5881 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5883 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5885 prev_stmt_info
= NULL
;
5886 for (j
= 0; j
< ncopies
; ++j
)
5891 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5893 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5895 else if (modifier
!= NONE
&& (j
& 1))
5897 if (modifier
== WIDEN
)
5900 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5901 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5904 else if (modifier
== NARROW
)
5906 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5909 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5918 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5920 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5924 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5927 == TYPE_VECTOR_SUBPARTS (srctype
));
5928 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5929 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5930 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5931 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5935 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5937 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5938 == TYPE_VECTOR_SUBPARTS (idxtype
));
5939 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5940 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5941 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5942 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5947 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5949 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5951 if (prev_stmt_info
== NULL
)
5952 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5954 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5955 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5960 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5963 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5964 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5965 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5967 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5970 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5972 /* We vectorize all the stmts of the interleaving group when we
5973 reach the last stmt in the group. */
5974 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5975 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5984 grouped_store
= false;
5985 /* VEC_NUM is the number of vect stmts to be created for this
5987 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5988 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5989 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5990 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5991 op
= gimple_assign_rhs1 (first_stmt
);
5994 /* VEC_NUM is the number of vect stmts to be created for this
5996 vec_num
= group_size
;
5998 ref_type
= get_group_alias_ptr_type (first_stmt
);
6004 group_size
= vec_num
= 1;
6005 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6008 if (dump_enabled_p ())
6009 dump_printf_loc (MSG_NOTE
, vect_location
,
6010 "transform store. ncopies = %d\n", ncopies
);
6012 if (memory_access_type
== VMAT_ELEMENTWISE
6013 || memory_access_type
== VMAT_STRIDED_SLP
)
6015 gimple_stmt_iterator incr_gsi
;
6021 gimple_seq stmts
= NULL
;
6022 tree stride_base
, stride_step
, alias_off
;
6026 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6029 = fold_build_pointer_plus
6030 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
6031 size_binop (PLUS_EXPR
,
6032 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
6033 convert_to_ptrofftype (DR_INIT (first_dr
))));
6034 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
6036 /* For a store with loop-invariant (but other than power-of-2)
6037 stride (i.e. not a grouped access) like so:
6039 for (i = 0; i < n; i += stride)
6042 we generate a new induction variable and new stores from
6043 the components of the (vectorized) rhs:
6045 for (j = 0; ; j += VF*stride)
6050 array[j + stride] = tmp2;
6054 unsigned nstores
= nunits
;
6056 tree ltype
= elem_type
;
6057 tree lvectype
= vectype
;
6060 if (group_size
< nunits
6061 && nunits
% group_size
== 0)
6063 nstores
= nunits
/ group_size
;
6065 ltype
= build_vector_type (elem_type
, group_size
);
6068 /* First check if vec_extract optab doesn't support extraction
6069 of vector elts directly. */
6070 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6072 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6073 || !VECTOR_MODE_P (vmode
)
6074 || (convert_optab_handler (vec_extract_optab
,
6075 TYPE_MODE (vectype
), vmode
)
6076 == CODE_FOR_nothing
))
6078 /* Try to avoid emitting an extract of vector elements
6079 by performing the extracts using an integer type of the
6080 same size, extracting from a vector of those and then
6081 re-interpreting it as the original vector type if
6084 = group_size
* GET_MODE_BITSIZE (elmode
);
6085 elmode
= int_mode_for_size (lsize
, 0).require ();
6086 /* If we can't construct such a vector fall back to
6087 element extracts from the original vector type and
6088 element size stores. */
6089 if (mode_for_vector (elmode
,
6090 nunits
/ group_size
).exists (&vmode
)
6091 && VECTOR_MODE_P (vmode
)
6092 && (convert_optab_handler (vec_extract_optab
,
6094 != CODE_FOR_nothing
))
6096 nstores
= nunits
/ group_size
;
6098 ltype
= build_nonstandard_integer_type (lsize
, 1);
6099 lvectype
= build_vector_type (ltype
, nstores
);
6101 /* Else fall back to vector extraction anyway.
6102 Fewer stores are more important than avoiding spilling
6103 of the vector we extract from. Compared to the
6104 construction case in vectorizable_load no store-forwarding
6105 issue exists here for reasonable archs. */
6108 else if (group_size
>= nunits
6109 && group_size
% nunits
== 0)
6116 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6117 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6120 ivstep
= stride_step
;
6121 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6122 build_int_cst (TREE_TYPE (ivstep
), vf
));
6124 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6126 create_iv (stride_base
, ivstep
, NULL
,
6127 loop
, &incr_gsi
, insert_after
,
6129 incr
= gsi_stmt (incr_gsi
);
6130 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6132 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6134 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6136 prev_stmt_info
= NULL
;
6137 alias_off
= build_int_cst (ref_type
, 0);
6138 next_stmt
= first_stmt
;
6139 for (g
= 0; g
< group_size
; g
++)
6141 running_off
= offvar
;
6144 tree size
= TYPE_SIZE_UNIT (ltype
);
6145 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6147 tree newoff
= copy_ssa_name (running_off
, NULL
);
6148 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6150 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6151 running_off
= newoff
;
6153 unsigned int group_el
= 0;
6154 unsigned HOST_WIDE_INT
6155 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6156 for (j
= 0; j
< ncopies
; j
++)
6158 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6159 and first_stmt == stmt. */
6164 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6166 vec_oprnd
= vec_oprnds
[0];
6170 gcc_assert (gimple_assign_single_p (next_stmt
));
6171 op
= gimple_assign_rhs1 (next_stmt
);
6172 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6178 vec_oprnd
= vec_oprnds
[j
];
6181 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6182 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6185 /* Pun the vector to extract from if necessary. */
6186 if (lvectype
!= vectype
)
6188 tree tem
= make_ssa_name (lvectype
);
6190 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6191 lvectype
, vec_oprnd
));
6192 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6195 for (i
= 0; i
< nstores
; i
++)
6197 tree newref
, newoff
;
6198 gimple
*incr
, *assign
;
6199 tree size
= TYPE_SIZE (ltype
);
6200 /* Extract the i'th component. */
6201 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6202 bitsize_int (i
), size
);
6203 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6206 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6210 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6212 newref
= build2 (MEM_REF
, ltype
,
6213 running_off
, this_off
);
6215 /* And store it to *running_off. */
6216 assign
= gimple_build_assign (newref
, elem
);
6217 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6221 || group_el
== group_size
)
6223 newoff
= copy_ssa_name (running_off
, NULL
);
6224 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6225 running_off
, stride_step
);
6226 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6228 running_off
= newoff
;
6231 if (g
== group_size
- 1
6234 if (j
== 0 && i
== 0)
6235 STMT_VINFO_VEC_STMT (stmt_info
)
6236 = *vec_stmt
= assign
;
6238 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6239 prev_stmt_info
= vinfo_for_stmt (assign
);
6243 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6248 vec_oprnds
.release ();
6252 auto_vec
<tree
> dr_chain (group_size
);
6253 oprnds
.create (group_size
);
6255 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6256 gcc_assert (alignment_support_scheme
);
6257 /* Targets with store-lane instructions must not require explicit
6259 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6260 || alignment_support_scheme
== dr_aligned
6261 || alignment_support_scheme
== dr_unaligned_supported
);
6263 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6264 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6265 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6267 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6268 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6270 aggr_type
= vectype
;
6272 /* In case the vectorization factor (VF) is bigger than the number
6273 of elements that we can fit in a vectype (nunits), we have to generate
6274 more than one vector stmt - i.e - we need to "unroll" the
6275 vector stmt by a factor VF/nunits. For more details see documentation in
6276 vect_get_vec_def_for_copy_stmt. */
6278 /* In case of interleaving (non-unit grouped access):
6285 We create vectorized stores starting from base address (the access of the
6286 first stmt in the chain (S2 in the above example), when the last store stmt
6287 of the chain (S4) is reached:
6290 VS2: &base + vec_size*1 = vx0
6291 VS3: &base + vec_size*2 = vx1
6292 VS4: &base + vec_size*3 = vx3
6294 Then permutation statements are generated:
6296 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6297 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6300 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6301 (the order of the data-refs in the output of vect_permute_store_chain
6302 corresponds to the order of scalar stmts in the interleaving chain - see
6303 the documentation of vect_permute_store_chain()).
6305 In case of both multiple types and interleaving, above vector stores and
6306 permutation stmts are created for every copy. The result vector stmts are
6307 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6308 STMT_VINFO_RELATED_STMT for the next copies.
6311 prev_stmt_info
= NULL
;
6312 for (j
= 0; j
< ncopies
; j
++)
6319 /* Get vectorized arguments for SLP_NODE. */
6320 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6323 vec_oprnd
= vec_oprnds
[0];
6327 /* For interleaved stores we collect vectorized defs for all the
6328 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6329 used as an input to vect_permute_store_chain(), and OPRNDS as
6330 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6332 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6333 OPRNDS are of size 1. */
6334 next_stmt
= first_stmt
;
6335 for (i
= 0; i
< group_size
; i
++)
6337 /* Since gaps are not supported for interleaved stores,
6338 GROUP_SIZE is the exact number of stmts in the chain.
6339 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6340 there is no interleaving, GROUP_SIZE is 1, and only one
6341 iteration of the loop will be executed. */
6342 gcc_assert (next_stmt
6343 && gimple_assign_single_p (next_stmt
));
6344 op
= gimple_assign_rhs1 (next_stmt
);
6346 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6347 dr_chain
.quick_push (vec_oprnd
);
6348 oprnds
.quick_push (vec_oprnd
);
6349 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6353 /* We should have catched mismatched types earlier. */
6354 gcc_assert (useless_type_conversion_p (vectype
,
6355 TREE_TYPE (vec_oprnd
)));
6356 bool simd_lane_access_p
6357 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6358 if (simd_lane_access_p
6359 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6360 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6361 && integer_zerop (DR_OFFSET (first_dr
))
6362 && integer_zerop (DR_INIT (first_dr
))
6363 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6364 get_alias_set (TREE_TYPE (ref_type
))))
6366 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6367 dataref_offset
= build_int_cst (ref_type
, 0);
6372 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6373 simd_lane_access_p
? loop
: NULL
,
6374 offset
, &dummy
, gsi
, &ptr_incr
,
6375 simd_lane_access_p
, &inv_p
);
6376 gcc_assert (bb_vinfo
|| !inv_p
);
6380 /* For interleaved stores we created vectorized defs for all the
6381 defs stored in OPRNDS in the previous iteration (previous copy).
6382 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6383 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6385 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6386 OPRNDS are of size 1. */
6387 for (i
= 0; i
< group_size
; i
++)
6390 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6391 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6392 dr_chain
[i
] = vec_oprnd
;
6393 oprnds
[i
] = vec_oprnd
;
6397 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6398 TYPE_SIZE_UNIT (aggr_type
));
6400 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6401 TYPE_SIZE_UNIT (aggr_type
));
6404 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6408 /* Combine all the vectors into an array. */
6409 vec_array
= create_vector_array (vectype
, vec_num
);
6410 for (i
= 0; i
< vec_num
; i
++)
6412 vec_oprnd
= dr_chain
[i
];
6413 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6417 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6418 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6419 gcall
*call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
6421 gimple_call_set_lhs (call
, data_ref
);
6422 gimple_call_set_nothrow (call
, true);
6424 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6432 result_chain
.create (group_size
);
6434 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6438 next_stmt
= first_stmt
;
6439 for (i
= 0; i
< vec_num
; i
++)
6441 unsigned align
, misalign
;
6444 /* Bump the vector pointer. */
6445 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6449 vec_oprnd
= vec_oprnds
[i
];
6450 else if (grouped_store
)
6451 /* For grouped stores vectorized defs are interleaved in
6452 vect_permute_store_chain(). */
6453 vec_oprnd
= result_chain
[i
];
6455 data_ref
= fold_build2 (MEM_REF
, vectype
,
6459 : build_int_cst (ref_type
, 0));
6460 align
= DR_TARGET_ALIGNMENT (first_dr
);
6461 if (aligned_access_p (first_dr
))
6463 else if (DR_MISALIGNMENT (first_dr
) == -1)
6465 align
= dr_alignment (vect_dr_behavior (first_dr
));
6467 TREE_TYPE (data_ref
)
6468 = build_aligned_type (TREE_TYPE (data_ref
),
6469 align
* BITS_PER_UNIT
);
6473 TREE_TYPE (data_ref
)
6474 = build_aligned_type (TREE_TYPE (data_ref
),
6475 TYPE_ALIGN (elem_type
));
6476 misalign
= DR_MISALIGNMENT (first_dr
);
6478 if (dataref_offset
== NULL_TREE
6479 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6480 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6483 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6485 tree perm_mask
= perm_mask_for_reverse (vectype
);
6487 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6489 tree new_temp
= make_ssa_name (perm_dest
);
6491 /* Generate the permute statement. */
6493 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6494 vec_oprnd
, perm_mask
);
6495 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6497 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6498 vec_oprnd
= new_temp
;
6501 /* Arguments are ready. Create the new vector stmt. */
6502 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6503 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6508 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6516 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6518 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6519 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6524 result_chain
.release ();
6525 vec_oprnds
.release ();
6530 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6531 VECTOR_CST mask. No checks are made that the target platform supports the
6532 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6533 vect_gen_perm_mask_checked. */
6536 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
6538 tree mask_elt_type
, mask_type
;
6540 mask_elt_type
= lang_hooks
.types
.type_for_mode
6541 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
6542 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6543 return vec_perm_indices_to_tree (mask_type
, sel
);
6546 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6547 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6550 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
6552 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
6553 return vect_gen_perm_mask_any (vectype
, sel
);
6556 /* Given a vector variable X and Y, that was generated for the scalar
6557 STMT, generate instructions to permute the vector elements of X and Y
6558 using permutation mask MASK_VEC, insert them at *GSI and return the
6559 permuted vector variable. */
6562 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6563 gimple_stmt_iterator
*gsi
)
6565 tree vectype
= TREE_TYPE (x
);
6566 tree perm_dest
, data_ref
;
6569 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6570 data_ref
= make_ssa_name (perm_dest
);
6572 /* Generate the permute statement. */
6573 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6574 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6579 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6580 inserting them on the loops preheader edge. Returns true if we
6581 were successful in doing so (and thus STMT can be moved then),
6582 otherwise returns false. */
6585 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6591 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6593 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6594 if (!gimple_nop_p (def_stmt
)
6595 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6597 /* Make sure we don't need to recurse. While we could do
6598 so in simple cases when there are more complex use webs
6599 we don't have an easy way to preserve stmt order to fulfil
6600 dependencies within them. */
6603 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6605 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6607 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6608 if (!gimple_nop_p (def_stmt2
)
6609 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6619 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6621 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6622 if (!gimple_nop_p (def_stmt
)
6623 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6625 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6626 gsi_remove (&gsi
, false);
6627 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6634 /* vectorizable_load.
6636 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6638 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6639 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6640 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6643 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6644 slp_tree slp_node
, slp_instance slp_node_instance
)
6647 tree vec_dest
= NULL
;
6648 tree data_ref
= NULL
;
6649 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6650 stmt_vec_info prev_stmt_info
;
6651 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6652 struct loop
*loop
= NULL
;
6653 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6654 bool nested_in_vect_loop
= false;
6655 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6659 gimple
*new_stmt
= NULL
;
6661 enum dr_alignment_support alignment_support_scheme
;
6662 tree dataref_ptr
= NULL_TREE
;
6663 tree dataref_offset
= NULL_TREE
;
6664 gimple
*ptr_incr
= NULL
;
6666 int i
, j
, group_size
, group_gap_adj
;
6667 tree msq
= NULL_TREE
, lsq
;
6668 tree offset
= NULL_TREE
;
6669 tree byte_offset
= NULL_TREE
;
6670 tree realignment_token
= NULL_TREE
;
6672 vec
<tree
> dr_chain
= vNULL
;
6673 bool grouped_load
= false;
6675 gimple
*first_stmt_for_drptr
= NULL
;
6677 bool compute_in_loop
= false;
6678 struct loop
*at_loop
;
6680 bool slp
= (slp_node
!= NULL
);
6681 bool slp_perm
= false;
6682 enum tree_code code
;
6683 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6686 gather_scatter_info gs_info
;
6687 vec_info
*vinfo
= stmt_info
->vinfo
;
6690 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6693 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6697 /* Is vectorizable load? */
6698 if (!is_gimple_assign (stmt
))
6701 scalar_dest
= gimple_assign_lhs (stmt
);
6702 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6705 code
= gimple_assign_rhs_code (stmt
);
6706 if (code
!= ARRAY_REF
6707 && code
!= BIT_FIELD_REF
6708 && code
!= INDIRECT_REF
6709 && code
!= COMPONENT_REF
6710 && code
!= IMAGPART_EXPR
6711 && code
!= REALPART_EXPR
6713 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6716 if (!STMT_VINFO_DATA_REF (stmt_info
))
6719 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6720 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6724 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6725 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6726 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6731 /* Multiple types in SLP are handled by creating the appropriate number of
6732 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6737 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6739 gcc_assert (ncopies
>= 1);
6741 /* FORNOW. This restriction should be relaxed. */
6742 if (nested_in_vect_loop
&& ncopies
> 1)
6744 if (dump_enabled_p ())
6745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6746 "multiple types in nested loop.\n");
6750 /* Invalidate assumptions made by dependence analysis when vectorization
6751 on the unrolled body effectively re-orders stmts. */
6753 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6754 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6755 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6757 if (dump_enabled_p ())
6758 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6759 "cannot perform implicit CSE when unrolling "
6760 "with negative dependence distance\n");
6764 elem_type
= TREE_TYPE (vectype
);
6765 mode
= TYPE_MODE (vectype
);
6767 /* FORNOW. In some cases can vectorize even if data-type not supported
6768 (e.g. - data copies). */
6769 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6771 if (dump_enabled_p ())
6772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6773 "Aligned load, but unsupported type.\n");
6777 /* Check if the load is a part of an interleaving chain. */
6778 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6780 grouped_load
= true;
6782 gcc_assert (!nested_in_vect_loop
);
6783 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6785 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6786 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6788 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6791 /* Invalidate assumptions made by dependence analysis when vectorization
6792 on the unrolled body effectively re-orders stmts. */
6793 if (!PURE_SLP_STMT (stmt_info
)
6794 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6795 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6796 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6798 if (dump_enabled_p ())
6799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6800 "cannot perform implicit CSE when performing "
6801 "group loads with negative dependence distance\n");
6805 /* Similarly when the stmt is a load that is both part of a SLP
6806 instance and a loop vectorized stmt via the same-dr mechanism
6807 we have to give up. */
6808 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6809 && (STMT_SLP_TYPE (stmt_info
)
6810 != STMT_SLP_TYPE (vinfo_for_stmt
6811 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6813 if (dump_enabled_p ())
6814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6815 "conflicting SLP types for CSEd load\n");
6820 vect_memory_access_type memory_access_type
;
6821 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6822 &memory_access_type
, &gs_info
))
6825 if (!vec_stmt
) /* transformation not required. */
6828 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6829 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6830 /* The SLP costs are calculated during SLP analysis. */
6831 if (!PURE_SLP_STMT (stmt_info
))
6832 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6838 gcc_assert (memory_access_type
6839 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6841 if (dump_enabled_p ())
6842 dump_printf_loc (MSG_NOTE
, vect_location
,
6843 "transform load. ncopies = %d\n", ncopies
);
6847 ensure_base_align (dr
);
6849 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6851 tree vec_oprnd0
= NULL_TREE
, op
;
6852 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6853 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6854 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6855 edge pe
= loop_preheader_edge (loop
);
6858 enum { NARROW
, NONE
, WIDEN
} modifier
;
6859 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6861 if (nunits
== gather_off_nunits
)
6863 else if (nunits
== gather_off_nunits
/ 2)
6867 vec_perm_builder
sel (gather_off_nunits
, gather_off_nunits
, 1);
6868 for (i
= 0; i
< gather_off_nunits
; ++i
)
6869 sel
.quick_push (i
| nunits
);
6871 vec_perm_indices
indices (sel
, 1, gather_off_nunits
);
6872 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6875 else if (nunits
== gather_off_nunits
* 2)
6879 vec_perm_builder
sel (nunits
, nunits
, 1);
6880 for (i
= 0; i
< nunits
; ++i
)
6881 sel
.quick_push (i
< gather_off_nunits
6882 ? i
: i
+ nunits
- gather_off_nunits
);
6884 vec_perm_indices
indices (sel
, 2, nunits
);
6885 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6891 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6892 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6893 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6894 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6895 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6896 scaletype
= TREE_VALUE (arglist
);
6897 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6899 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6901 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6902 if (!is_gimple_min_invariant (ptr
))
6904 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6905 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6906 gcc_assert (!new_bb
);
6909 /* Currently we support only unconditional gather loads,
6910 so mask should be all ones. */
6911 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6912 mask
= build_int_cst (masktype
, -1);
6913 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6915 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6916 mask
= build_vector_from_val (masktype
, mask
);
6917 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6919 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6923 for (j
= 0; j
< 6; ++j
)
6925 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6926 mask
= build_real (TREE_TYPE (masktype
), r
);
6927 mask
= build_vector_from_val (masktype
, mask
);
6928 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6933 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6935 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6936 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6937 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6941 for (j
= 0; j
< 6; ++j
)
6943 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6944 merge
= build_real (TREE_TYPE (rettype
), r
);
6948 merge
= build_vector_from_val (rettype
, merge
);
6949 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6951 prev_stmt_info
= NULL
;
6952 for (j
= 0; j
< ncopies
; ++j
)
6954 if (modifier
== WIDEN
&& (j
& 1))
6955 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6956 perm_mask
, stmt
, gsi
);
6959 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6962 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6964 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6967 == TYPE_VECTOR_SUBPARTS (idxtype
));
6968 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6969 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6971 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6972 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6977 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6979 if (!useless_type_conversion_p (vectype
, rettype
))
6981 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6982 == TYPE_VECTOR_SUBPARTS (rettype
));
6983 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6984 gimple_call_set_lhs (new_stmt
, op
);
6985 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6986 var
= make_ssa_name (vec_dest
);
6987 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6989 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6993 var
= make_ssa_name (vec_dest
, new_stmt
);
6994 gimple_call_set_lhs (new_stmt
, var
);
6997 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6999 if (modifier
== NARROW
)
7006 var
= permute_vec_elements (prev_res
, var
,
7007 perm_mask
, stmt
, gsi
);
7008 new_stmt
= SSA_NAME_DEF_STMT (var
);
7011 if (prev_stmt_info
== NULL
)
7012 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7014 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7015 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7020 if (memory_access_type
== VMAT_ELEMENTWISE
7021 || memory_access_type
== VMAT_STRIDED_SLP
)
7023 gimple_stmt_iterator incr_gsi
;
7029 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7030 gimple_seq stmts
= NULL
;
7031 tree stride_base
, stride_step
, alias_off
;
7033 gcc_assert (!nested_in_vect_loop
);
7035 if (slp
&& grouped_load
)
7037 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7038 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7039 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7040 ref_type
= get_group_alias_ptr_type (first_stmt
);
7047 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7051 = fold_build_pointer_plus
7052 (DR_BASE_ADDRESS (first_dr
),
7053 size_binop (PLUS_EXPR
,
7054 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7055 convert_to_ptrofftype (DR_INIT (first_dr
))));
7056 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7058 /* For a load with loop-invariant (but other than power-of-2)
7059 stride (i.e. not a grouped access) like so:
7061 for (i = 0; i < n; i += stride)
7064 we generate a new induction variable and new accesses to
7065 form a new vector (or vectors, depending on ncopies):
7067 for (j = 0; ; j += VF*stride)
7069 tmp2 = array[j + stride];
7071 vectemp = {tmp1, tmp2, ...}
7074 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7075 build_int_cst (TREE_TYPE (stride_step
), vf
));
7077 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7079 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
7080 loop
, &incr_gsi
, insert_after
,
7082 incr
= gsi_stmt (incr_gsi
);
7083 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7085 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
7086 &stmts
, true, NULL_TREE
);
7088 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
7090 prev_stmt_info
= NULL
;
7091 running_off
= offvar
;
7092 alias_off
= build_int_cst (ref_type
, 0);
7093 int nloads
= nunits
;
7095 tree ltype
= TREE_TYPE (vectype
);
7096 tree lvectype
= vectype
;
7097 auto_vec
<tree
> dr_chain
;
7098 if (memory_access_type
== VMAT_STRIDED_SLP
)
7100 if (group_size
< nunits
)
7102 /* First check if vec_init optab supports construction from
7103 vector elts directly. */
7104 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7106 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7107 && VECTOR_MODE_P (vmode
)
7108 && (convert_optab_handler (vec_init_optab
,
7109 TYPE_MODE (vectype
), vmode
)
7110 != CODE_FOR_nothing
))
7112 nloads
= nunits
/ group_size
;
7114 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7118 /* Otherwise avoid emitting a constructor of vector elements
7119 by performing the loads using an integer type of the same
7120 size, constructing a vector of those and then
7121 re-interpreting it as the original vector type.
7122 This avoids a huge runtime penalty due to the general
7123 inability to perform store forwarding from smaller stores
7124 to a larger load. */
7126 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7127 elmode
= int_mode_for_size (lsize
, 0).require ();
7128 /* If we can't construct such a vector fall back to
7129 element loads of the original vector type. */
7130 if (mode_for_vector (elmode
,
7131 nunits
/ group_size
).exists (&vmode
)
7132 && VECTOR_MODE_P (vmode
)
7133 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7134 != CODE_FOR_nothing
))
7136 nloads
= nunits
/ group_size
;
7138 ltype
= build_nonstandard_integer_type (lsize
, 1);
7139 lvectype
= build_vector_type (ltype
, nloads
);
7149 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7153 /* For SLP permutation support we need to load the whole group,
7154 not only the number of vector stmts the permutation result
7158 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
7159 dr_chain
.create (ncopies
);
7162 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7165 unsigned HOST_WIDE_INT
7166 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7167 for (j
= 0; j
< ncopies
; j
++)
7170 vec_alloc (v
, nloads
);
7171 for (i
= 0; i
< nloads
; i
++)
7173 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7175 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7176 build2 (MEM_REF
, ltype
,
7177 running_off
, this_off
));
7178 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7180 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7181 gimple_assign_lhs (new_stmt
));
7185 || group_el
== group_size
)
7187 tree newoff
= copy_ssa_name (running_off
);
7188 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7189 running_off
, stride_step
);
7190 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7192 running_off
= newoff
;
7198 tree vec_inv
= build_constructor (lvectype
, v
);
7199 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7200 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7201 if (lvectype
!= vectype
)
7203 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7205 build1 (VIEW_CONVERT_EXPR
,
7206 vectype
, new_temp
));
7207 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7214 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7216 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7221 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7223 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7224 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7230 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7231 slp_node_instance
, false, &n_perms
);
7238 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7239 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7240 /* For SLP vectorization we directly vectorize a subchain
7241 without permutation. */
7242 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7243 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7244 /* For BB vectorization always use the first stmt to base
7245 the data ref pointer on. */
7247 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7249 /* Check if the chain of loads is already vectorized. */
7250 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7251 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7252 ??? But we can only do so if there is exactly one
7253 as we have no way to get at the rest. Leave the CSE
7255 ??? With the group load eventually participating
7256 in multiple different permutations (having multiple
7257 slp nodes which refer to the same group) the CSE
7258 is even wrong code. See PR56270. */
7261 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7264 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7267 /* VEC_NUM is the number of vect stmts to be created for this group. */
7270 grouped_load
= false;
7271 /* For SLP permutation support we need to load the whole group,
7272 not only the number of vector stmts the permutation result
7276 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
7277 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7281 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7283 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7287 vec_num
= group_size
;
7289 ref_type
= get_group_alias_ptr_type (first_stmt
);
7295 group_size
= vec_num
= 1;
7297 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7300 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7301 gcc_assert (alignment_support_scheme
);
7302 /* Targets with load-lane instructions must not require explicit
7304 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7305 || alignment_support_scheme
== dr_aligned
7306 || alignment_support_scheme
== dr_unaligned_supported
);
7308 /* In case the vectorization factor (VF) is bigger than the number
7309 of elements that we can fit in a vectype (nunits), we have to generate
7310 more than one vector stmt - i.e - we need to "unroll" the
7311 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7312 from one copy of the vector stmt to the next, in the field
7313 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7314 stages to find the correct vector defs to be used when vectorizing
7315 stmts that use the defs of the current stmt. The example below
7316 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7317 need to create 4 vectorized stmts):
7319 before vectorization:
7320 RELATED_STMT VEC_STMT
7324 step 1: vectorize stmt S1:
7325 We first create the vector stmt VS1_0, and, as usual, record a
7326 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7327 Next, we create the vector stmt VS1_1, and record a pointer to
7328 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7329 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7331 RELATED_STMT VEC_STMT
7332 VS1_0: vx0 = memref0 VS1_1 -
7333 VS1_1: vx1 = memref1 VS1_2 -
7334 VS1_2: vx2 = memref2 VS1_3 -
7335 VS1_3: vx3 = memref3 - -
7336 S1: x = load - VS1_0
7339 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7340 information we recorded in RELATED_STMT field is used to vectorize
7343 /* In case of interleaving (non-unit grouped access):
7350 Vectorized loads are created in the order of memory accesses
7351 starting from the access of the first stmt of the chain:
7354 VS2: vx1 = &base + vec_size*1
7355 VS3: vx3 = &base + vec_size*2
7356 VS4: vx4 = &base + vec_size*3
7358 Then permutation statements are generated:
7360 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7361 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7364 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7365 (the order of the data-refs in the output of vect_permute_load_chain
7366 corresponds to the order of scalar stmts in the interleaving chain - see
7367 the documentation of vect_permute_load_chain()).
7368 The generation of permutation stmts and recording them in
7369 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7371 In case of both multiple types and interleaving, the vector loads and
7372 permutation stmts above are created for every copy. The result vector
7373 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7374 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7376 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7377 on a target that supports unaligned accesses (dr_unaligned_supported)
7378 we generate the following code:
7382 p = p + indx * vectype_size;
7387 Otherwise, the data reference is potentially unaligned on a target that
7388 does not support unaligned accesses (dr_explicit_realign_optimized) -
7389 then generate the following code, in which the data in each iteration is
7390 obtained by two vector loads, one from the previous iteration, and one
7391 from the current iteration:
7393 msq_init = *(floor(p1))
7394 p2 = initial_addr + VS - 1;
7395 realignment_token = call target_builtin;
7398 p2 = p2 + indx * vectype_size
7400 vec_dest = realign_load (msq, lsq, realignment_token)
7405 /* If the misalignment remains the same throughout the execution of the
7406 loop, we can create the init_addr and permutation mask at the loop
7407 preheader. Otherwise, it needs to be created inside the loop.
7408 This can only occur when vectorizing memory accesses in the inner-loop
7409 nested within an outer-loop that is being vectorized. */
7411 if (nested_in_vect_loop
7412 && (DR_STEP_ALIGNMENT (dr
) % GET_MODE_SIZE (TYPE_MODE (vectype
))) != 0)
7414 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7415 compute_in_loop
= true;
7418 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7419 || alignment_support_scheme
== dr_explicit_realign
)
7420 && !compute_in_loop
)
7422 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7423 alignment_support_scheme
, NULL_TREE
,
7425 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7427 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7428 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7435 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7436 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7438 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7439 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7441 aggr_type
= vectype
;
7443 prev_stmt_info
= NULL
;
7445 for (j
= 0; j
< ncopies
; j
++)
7447 /* 1. Create the vector or array pointer update chain. */
7450 bool simd_lane_access_p
7451 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7452 if (simd_lane_access_p
7453 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7454 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7455 && integer_zerop (DR_OFFSET (first_dr
))
7456 && integer_zerop (DR_INIT (first_dr
))
7457 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7458 get_alias_set (TREE_TYPE (ref_type
)))
7459 && (alignment_support_scheme
== dr_aligned
7460 || alignment_support_scheme
== dr_unaligned_supported
))
7462 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7463 dataref_offset
= build_int_cst (ref_type
, 0);
7466 else if (first_stmt_for_drptr
7467 && first_stmt
!= first_stmt_for_drptr
)
7470 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7471 at_loop
, offset
, &dummy
, gsi
,
7472 &ptr_incr
, simd_lane_access_p
,
7473 &inv_p
, byte_offset
);
7474 /* Adjust the pointer by the difference to first_stmt. */
7475 data_reference_p ptrdr
7476 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7477 tree diff
= fold_convert (sizetype
,
7478 size_binop (MINUS_EXPR
,
7481 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7486 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7487 offset
, &dummy
, gsi
, &ptr_incr
,
7488 simd_lane_access_p
, &inv_p
,
7491 else if (dataref_offset
)
7492 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7493 TYPE_SIZE_UNIT (aggr_type
));
7495 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7496 TYPE_SIZE_UNIT (aggr_type
));
7498 if (grouped_load
|| slp_perm
)
7499 dr_chain
.create (vec_num
);
7501 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7505 vec_array
= create_vector_array (vectype
, vec_num
);
7508 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7509 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7510 gcall
*call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1,
7512 gimple_call_set_lhs (call
, vec_array
);
7513 gimple_call_set_nothrow (call
, true);
7515 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7517 /* Extract each vector into an SSA_NAME. */
7518 for (i
= 0; i
< vec_num
; i
++)
7520 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7522 dr_chain
.quick_push (new_temp
);
7525 /* Record the mapping between SSA_NAMEs and statements. */
7526 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7530 for (i
= 0; i
< vec_num
; i
++)
7533 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7536 /* 2. Create the vector-load in the loop. */
7537 switch (alignment_support_scheme
)
7540 case dr_unaligned_supported
:
7542 unsigned int align
, misalign
;
7545 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7548 : build_int_cst (ref_type
, 0));
7549 align
= DR_TARGET_ALIGNMENT (dr
);
7550 if (alignment_support_scheme
== dr_aligned
)
7552 gcc_assert (aligned_access_p (first_dr
));
7555 else if (DR_MISALIGNMENT (first_dr
) == -1)
7557 align
= dr_alignment (vect_dr_behavior (first_dr
));
7559 TREE_TYPE (data_ref
)
7560 = build_aligned_type (TREE_TYPE (data_ref
),
7561 align
* BITS_PER_UNIT
);
7565 TREE_TYPE (data_ref
)
7566 = build_aligned_type (TREE_TYPE (data_ref
),
7567 TYPE_ALIGN (elem_type
));
7568 misalign
= DR_MISALIGNMENT (first_dr
);
7570 if (dataref_offset
== NULL_TREE
7571 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7572 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7576 case dr_explicit_realign
:
7580 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7582 if (compute_in_loop
)
7583 msq
= vect_setup_realignment (first_stmt
, gsi
,
7585 dr_explicit_realign
,
7588 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7589 ptr
= copy_ssa_name (dataref_ptr
);
7591 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7592 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7593 new_stmt
= gimple_build_assign
7594 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7596 (TREE_TYPE (dataref_ptr
),
7597 -(HOST_WIDE_INT
) align
));
7598 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7600 = build2 (MEM_REF
, vectype
, ptr
,
7601 build_int_cst (ref_type
, 0));
7602 vec_dest
= vect_create_destination_var (scalar_dest
,
7604 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7605 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7606 gimple_assign_set_lhs (new_stmt
, new_temp
);
7607 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7608 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7609 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7612 bump
= size_binop (MULT_EXPR
, vs
,
7613 TYPE_SIZE_UNIT (elem_type
));
7614 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7615 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7616 new_stmt
= gimple_build_assign
7617 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7619 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
7620 ptr
= copy_ssa_name (ptr
, new_stmt
);
7621 gimple_assign_set_lhs (new_stmt
, ptr
);
7622 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7624 = build2 (MEM_REF
, vectype
, ptr
,
7625 build_int_cst (ref_type
, 0));
7628 case dr_explicit_realign_optimized
:
7630 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7631 new_temp
= copy_ssa_name (dataref_ptr
);
7633 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7634 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7635 new_stmt
= gimple_build_assign
7636 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7637 build_int_cst (TREE_TYPE (dataref_ptr
),
7638 -(HOST_WIDE_INT
) align
));
7639 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7641 = build2 (MEM_REF
, vectype
, new_temp
,
7642 build_int_cst (ref_type
, 0));
7648 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7649 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7650 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7651 gimple_assign_set_lhs (new_stmt
, new_temp
);
7652 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7654 /* 3. Handle explicit realignment if necessary/supported.
7656 vec_dest = realign_load (msq, lsq, realignment_token) */
7657 if (alignment_support_scheme
== dr_explicit_realign_optimized
7658 || alignment_support_scheme
== dr_explicit_realign
)
7660 lsq
= gimple_assign_lhs (new_stmt
);
7661 if (!realignment_token
)
7662 realignment_token
= dataref_ptr
;
7663 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7664 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7665 msq
, lsq
, realignment_token
);
7666 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7667 gimple_assign_set_lhs (new_stmt
, new_temp
);
7668 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7670 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7673 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7674 add_phi_arg (phi
, lsq
,
7675 loop_latch_edge (containing_loop
),
7681 /* 4. Handle invariant-load. */
7682 if (inv_p
&& !bb_vinfo
)
7684 gcc_assert (!grouped_load
);
7685 /* If we have versioned for aliasing or the loop doesn't
7686 have any data dependencies that would preclude this,
7687 then we are sure this is a loop invariant load and
7688 thus we can insert it on the preheader edge. */
7689 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7690 && !nested_in_vect_loop
7691 && hoist_defs_of_uses (stmt
, loop
))
7693 if (dump_enabled_p ())
7695 dump_printf_loc (MSG_NOTE
, vect_location
,
7696 "hoisting out of the vectorized "
7698 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7700 tree tem
= copy_ssa_name (scalar_dest
);
7701 gsi_insert_on_edge_immediate
7702 (loop_preheader_edge (loop
),
7703 gimple_build_assign (tem
,
7705 (gimple_assign_rhs1 (stmt
))));
7706 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7707 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7708 set_vinfo_for_stmt (new_stmt
,
7709 new_stmt_vec_info (new_stmt
, vinfo
));
7713 gimple_stmt_iterator gsi2
= *gsi
;
7715 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7717 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7721 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7723 tree perm_mask
= perm_mask_for_reverse (vectype
);
7724 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7725 perm_mask
, stmt
, gsi
);
7726 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7729 /* Collect vector loads and later create their permutation in
7730 vect_transform_grouped_load (). */
7731 if (grouped_load
|| slp_perm
)
7732 dr_chain
.quick_push (new_temp
);
7734 /* Store vector loads in the corresponding SLP_NODE. */
7735 if (slp
&& !slp_perm
)
7736 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7738 /* With SLP permutation we load the gaps as well, without
7739 we need to skip the gaps after we manage to fully load
7740 all elements. group_gap_adj is GROUP_SIZE here. */
7741 group_elt
+= nunits
;
7742 if (group_gap_adj
!= 0 && ! slp_perm
7743 && group_elt
== group_size
- group_gap_adj
)
7745 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7747 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7748 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7753 /* Bump the vector pointer to account for a gap or for excess
7754 elements loaded for a permuted SLP load. */
7755 if (group_gap_adj
!= 0 && slp_perm
)
7757 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7759 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7760 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7765 if (slp
&& !slp_perm
)
7771 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7772 slp_node_instance
, false,
7775 dr_chain
.release ();
7783 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7784 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7785 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7790 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7792 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7793 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7796 dr_chain
.release ();
7802 /* Function vect_is_simple_cond.
7805 LOOP - the loop that is being vectorized.
7806 COND - Condition that is checked for simple use.
7809 *COMP_VECTYPE - the vector type for the comparison.
7810 *DTS - The def types for the arguments of the comparison
7812 Returns whether a COND can be vectorized. Checks whether
7813 condition operands are supportable using vec_is_simple_use. */
7816 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
7817 tree
*comp_vectype
, enum vect_def_type
*dts
,
7821 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7824 if (TREE_CODE (cond
) == SSA_NAME
7825 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
7827 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7828 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7829 &dts
[0], comp_vectype
)
7831 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7836 if (!COMPARISON_CLASS_P (cond
))
7839 lhs
= TREE_OPERAND (cond
, 0);
7840 rhs
= TREE_OPERAND (cond
, 1);
7842 if (TREE_CODE (lhs
) == SSA_NAME
)
7844 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7845 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
7848 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
7849 || TREE_CODE (lhs
) == FIXED_CST
)
7850 dts
[0] = vect_constant_def
;
7854 if (TREE_CODE (rhs
) == SSA_NAME
)
7856 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7857 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
7860 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
7861 || TREE_CODE (rhs
) == FIXED_CST
)
7862 dts
[1] = vect_constant_def
;
7866 if (vectype1
&& vectype2
7867 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7870 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7871 /* Invariant comparison. */
7872 if (! *comp_vectype
)
7874 tree scalar_type
= TREE_TYPE (lhs
);
7875 /* If we can widen the comparison to match vectype do so. */
7876 if (INTEGRAL_TYPE_P (scalar_type
)
7877 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
7878 TYPE_SIZE (TREE_TYPE (vectype
))))
7879 scalar_type
= build_nonstandard_integer_type
7880 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
7881 TYPE_UNSIGNED (scalar_type
));
7882 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
7888 /* vectorizable_condition.
7890 Check if STMT is conditional modify expression that can be vectorized.
7891 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7892 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7895 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7896 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7897 else clause if it is 2).
7899 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7902 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7903 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7906 tree scalar_dest
= NULL_TREE
;
7907 tree vec_dest
= NULL_TREE
;
7908 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
7909 tree then_clause
, else_clause
;
7910 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7911 tree comp_vectype
= NULL_TREE
;
7912 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7913 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7916 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7917 enum vect_def_type dts
[4]
7918 = {vect_unknown_def_type
, vect_unknown_def_type
,
7919 vect_unknown_def_type
, vect_unknown_def_type
};
7922 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7923 stmt_vec_info prev_stmt_info
= NULL
;
7925 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7926 vec
<tree
> vec_oprnds0
= vNULL
;
7927 vec
<tree
> vec_oprnds1
= vNULL
;
7928 vec
<tree
> vec_oprnds2
= vNULL
;
7929 vec
<tree
> vec_oprnds3
= vNULL
;
7931 bool masked
= false;
7933 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7936 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7938 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7941 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7942 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7946 /* FORNOW: not yet supported. */
7947 if (STMT_VINFO_LIVE_P (stmt_info
))
7949 if (dump_enabled_p ())
7950 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7951 "value used after loop.\n");
7956 /* Is vectorizable conditional operation? */
7957 if (!is_gimple_assign (stmt
))
7960 code
= gimple_assign_rhs_code (stmt
);
7962 if (code
!= COND_EXPR
)
7965 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7966 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7971 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7973 gcc_assert (ncopies
>= 1);
7974 if (reduc_index
&& ncopies
> 1)
7975 return false; /* FORNOW */
7977 cond_expr
= gimple_assign_rhs1 (stmt
);
7978 then_clause
= gimple_assign_rhs2 (stmt
);
7979 else_clause
= gimple_assign_rhs3 (stmt
);
7981 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
7982 &comp_vectype
, &dts
[0], vectype
)
7987 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
7990 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
7994 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7997 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8000 masked
= !COMPARISON_CLASS_P (cond_expr
);
8001 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8003 if (vec_cmp_type
== NULL_TREE
)
8006 cond_code
= TREE_CODE (cond_expr
);
8009 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8010 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8013 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8015 /* Boolean values may have another representation in vectors
8016 and therefore we prefer bit operations over comparison for
8017 them (which also works for scalar masks). We store opcodes
8018 to use in bitop1 and bitop2. Statement is vectorized as
8019 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8020 depending on bitop1 and bitop2 arity. */
8024 bitop1
= BIT_NOT_EXPR
;
8025 bitop2
= BIT_AND_EXPR
;
8028 bitop1
= BIT_NOT_EXPR
;
8029 bitop2
= BIT_IOR_EXPR
;
8032 bitop1
= BIT_NOT_EXPR
;
8033 bitop2
= BIT_AND_EXPR
;
8034 std::swap (cond_expr0
, cond_expr1
);
8037 bitop1
= BIT_NOT_EXPR
;
8038 bitop2
= BIT_IOR_EXPR
;
8039 std::swap (cond_expr0
, cond_expr1
);
8042 bitop1
= BIT_XOR_EXPR
;
8045 bitop1
= BIT_XOR_EXPR
;
8046 bitop2
= BIT_NOT_EXPR
;
8051 cond_code
= SSA_NAME
;
8056 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8057 if (bitop1
!= NOP_EXPR
)
8059 machine_mode mode
= TYPE_MODE (comp_vectype
);
8062 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8063 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8066 if (bitop2
!= NOP_EXPR
)
8068 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8070 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8074 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8077 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
8087 vec_oprnds0
.create (1);
8088 vec_oprnds1
.create (1);
8089 vec_oprnds2
.create (1);
8090 vec_oprnds3
.create (1);
8094 scalar_dest
= gimple_assign_lhs (stmt
);
8095 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8097 /* Handle cond expr. */
8098 for (j
= 0; j
< ncopies
; j
++)
8100 gassign
*new_stmt
= NULL
;
8105 auto_vec
<tree
, 4> ops
;
8106 auto_vec
<vec
<tree
>, 4> vec_defs
;
8109 ops
.safe_push (cond_expr
);
8112 ops
.safe_push (cond_expr0
);
8113 ops
.safe_push (cond_expr1
);
8115 ops
.safe_push (then_clause
);
8116 ops
.safe_push (else_clause
);
8117 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8118 vec_oprnds3
= vec_defs
.pop ();
8119 vec_oprnds2
= vec_defs
.pop ();
8121 vec_oprnds1
= vec_defs
.pop ();
8122 vec_oprnds0
= vec_defs
.pop ();
8130 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8132 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8138 = vect_get_vec_def_for_operand (cond_expr0
,
8139 stmt
, comp_vectype
);
8140 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8143 = vect_get_vec_def_for_operand (cond_expr1
,
8144 stmt
, comp_vectype
);
8145 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8147 if (reduc_index
== 1)
8148 vec_then_clause
= reduc_def
;
8151 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8153 vect_is_simple_use (then_clause
, loop_vinfo
,
8156 if (reduc_index
== 2)
8157 vec_else_clause
= reduc_def
;
8160 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8162 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8169 = vect_get_vec_def_for_stmt_copy (dts
[0],
8170 vec_oprnds0
.pop ());
8173 = vect_get_vec_def_for_stmt_copy (dts
[1],
8174 vec_oprnds1
.pop ());
8176 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8177 vec_oprnds2
.pop ());
8178 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8179 vec_oprnds3
.pop ());
8184 vec_oprnds0
.quick_push (vec_cond_lhs
);
8186 vec_oprnds1
.quick_push (vec_cond_rhs
);
8187 vec_oprnds2
.quick_push (vec_then_clause
);
8188 vec_oprnds3
.quick_push (vec_else_clause
);
8191 /* Arguments are ready. Create the new vector stmt. */
8192 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8194 vec_then_clause
= vec_oprnds2
[i
];
8195 vec_else_clause
= vec_oprnds3
[i
];
8198 vec_compare
= vec_cond_lhs
;
8201 vec_cond_rhs
= vec_oprnds1
[i
];
8202 if (bitop1
== NOP_EXPR
)
8203 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8204 vec_cond_lhs
, vec_cond_rhs
);
8207 new_temp
= make_ssa_name (vec_cmp_type
);
8208 if (bitop1
== BIT_NOT_EXPR
)
8209 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8213 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8215 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8216 if (bitop2
== NOP_EXPR
)
8217 vec_compare
= new_temp
;
8218 else if (bitop2
== BIT_NOT_EXPR
)
8220 /* Instead of doing ~x ? y : z do x ? z : y. */
8221 vec_compare
= new_temp
;
8222 std::swap (vec_then_clause
, vec_else_clause
);
8226 vec_compare
= make_ssa_name (vec_cmp_type
);
8228 = gimple_build_assign (vec_compare
, bitop2
,
8229 vec_cond_lhs
, new_temp
);
8230 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8234 new_temp
= make_ssa_name (vec_dest
);
8235 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8236 vec_compare
, vec_then_clause
,
8238 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8240 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8247 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8249 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8251 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8254 vec_oprnds0
.release ();
8255 vec_oprnds1
.release ();
8256 vec_oprnds2
.release ();
8257 vec_oprnds3
.release ();
8262 /* vectorizable_comparison.
8264 Check if STMT is comparison expression that can be vectorized.
8265 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8266 comparison, put it in VEC_STMT, and insert it at GSI.
8268 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8271 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8272 gimple
**vec_stmt
, tree reduc_def
,
8275 tree lhs
, rhs1
, rhs2
;
8276 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8277 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8278 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8279 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8281 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8282 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8286 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8287 stmt_vec_info prev_stmt_info
= NULL
;
8289 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8290 vec
<tree
> vec_oprnds0
= vNULL
;
8291 vec
<tree
> vec_oprnds1
= vNULL
;
8296 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8299 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8302 mask_type
= vectype
;
8303 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8308 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8310 gcc_assert (ncopies
>= 1);
8311 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8312 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8316 if (STMT_VINFO_LIVE_P (stmt_info
))
8318 if (dump_enabled_p ())
8319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8320 "value used after loop.\n");
8324 if (!is_gimple_assign (stmt
))
8327 code
= gimple_assign_rhs_code (stmt
);
8329 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8332 rhs1
= gimple_assign_rhs1 (stmt
);
8333 rhs2
= gimple_assign_rhs2 (stmt
);
8335 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8336 &dts
[0], &vectype1
))
8339 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8340 &dts
[1], &vectype2
))
8343 if (vectype1
&& vectype2
8344 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
8347 vectype
= vectype1
? vectype1
: vectype2
;
8349 /* Invariant comparison. */
8352 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8353 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
8356 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
8359 /* Can't compare mask and non-mask types. */
8360 if (vectype1
&& vectype2
8361 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8364 /* Boolean values may have another representation in vectors
8365 and therefore we prefer bit operations over comparison for
8366 them (which also works for scalar masks). We store opcodes
8367 to use in bitop1 and bitop2. Statement is vectorized as
8368 BITOP2 (rhs1 BITOP1 rhs2) or
8369 rhs1 BITOP2 (BITOP1 rhs2)
8370 depending on bitop1 and bitop2 arity. */
8371 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8373 if (code
== GT_EXPR
)
8375 bitop1
= BIT_NOT_EXPR
;
8376 bitop2
= BIT_AND_EXPR
;
8378 else if (code
== GE_EXPR
)
8380 bitop1
= BIT_NOT_EXPR
;
8381 bitop2
= BIT_IOR_EXPR
;
8383 else if (code
== LT_EXPR
)
8385 bitop1
= BIT_NOT_EXPR
;
8386 bitop2
= BIT_AND_EXPR
;
8387 std::swap (rhs1
, rhs2
);
8388 std::swap (dts
[0], dts
[1]);
8390 else if (code
== LE_EXPR
)
8392 bitop1
= BIT_NOT_EXPR
;
8393 bitop2
= BIT_IOR_EXPR
;
8394 std::swap (rhs1
, rhs2
);
8395 std::swap (dts
[0], dts
[1]);
8399 bitop1
= BIT_XOR_EXPR
;
8400 if (code
== EQ_EXPR
)
8401 bitop2
= BIT_NOT_EXPR
;
8407 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8408 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8409 dts
, ndts
, NULL
, NULL
);
8410 if (bitop1
== NOP_EXPR
)
8411 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8414 machine_mode mode
= TYPE_MODE (vectype
);
8417 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8418 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8421 if (bitop2
!= NOP_EXPR
)
8423 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8424 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8434 vec_oprnds0
.create (1);
8435 vec_oprnds1
.create (1);
8439 lhs
= gimple_assign_lhs (stmt
);
8440 mask
= vect_create_destination_var (lhs
, mask_type
);
8442 /* Handle cmp expr. */
8443 for (j
= 0; j
< ncopies
; j
++)
8445 gassign
*new_stmt
= NULL
;
8450 auto_vec
<tree
, 2> ops
;
8451 auto_vec
<vec
<tree
>, 2> vec_defs
;
8453 ops
.safe_push (rhs1
);
8454 ops
.safe_push (rhs2
);
8455 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8456 vec_oprnds1
= vec_defs
.pop ();
8457 vec_oprnds0
= vec_defs
.pop ();
8461 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8462 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8467 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8468 vec_oprnds0
.pop ());
8469 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8470 vec_oprnds1
.pop ());
8475 vec_oprnds0
.quick_push (vec_rhs1
);
8476 vec_oprnds1
.quick_push (vec_rhs2
);
8479 /* Arguments are ready. Create the new vector stmt. */
8480 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8482 vec_rhs2
= vec_oprnds1
[i
];
8484 new_temp
= make_ssa_name (mask
);
8485 if (bitop1
== NOP_EXPR
)
8487 new_stmt
= gimple_build_assign (new_temp
, code
,
8488 vec_rhs1
, vec_rhs2
);
8489 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8493 if (bitop1
== BIT_NOT_EXPR
)
8494 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8496 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8498 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8499 if (bitop2
!= NOP_EXPR
)
8501 tree res
= make_ssa_name (mask
);
8502 if (bitop2
== BIT_NOT_EXPR
)
8503 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8505 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8507 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8511 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8518 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8520 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8522 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8525 vec_oprnds0
.release ();
8526 vec_oprnds1
.release ();
8531 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8532 can handle all live statements in the node. Otherwise return true
8533 if STMT is not live or if vectorizable_live_operation can handle it.
8534 GSI and VEC_STMT are as for vectorizable_live_operation. */
8537 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8538 slp_tree slp_node
, gimple
**vec_stmt
)
8544 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8546 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8547 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8548 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8553 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
8554 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
))
8560 /* Make sure the statement is vectorizable. */
8563 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
8564 slp_instance node_instance
)
8566 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8567 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8568 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8570 gimple
*pattern_stmt
;
8571 gimple_seq pattern_def_seq
;
8573 if (dump_enabled_p ())
8575 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8576 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8579 if (gimple_has_volatile_ops (stmt
))
8581 if (dump_enabled_p ())
8582 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8583 "not vectorized: stmt has volatile operands\n");
8588 /* Skip stmts that do not need to be vectorized. In loops this is expected
8590 - the COND_EXPR which is the loop exit condition
8591 - any LABEL_EXPRs in the loop
8592 - computations that are used only for array indexing or loop control.
8593 In basic blocks we only analyze statements that are a part of some SLP
8594 instance, therefore, all the statements are relevant.
8596 Pattern statement needs to be analyzed instead of the original statement
8597 if the original statement is not relevant. Otherwise, we analyze both
8598 statements. In basic blocks we are called from some SLP instance
8599 traversal, don't analyze pattern stmts instead, the pattern stmts
8600 already will be part of SLP instance. */
8602 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8603 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8604 && !STMT_VINFO_LIVE_P (stmt_info
))
8606 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8608 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8609 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8611 /* Analyze PATTERN_STMT instead of the original stmt. */
8612 stmt
= pattern_stmt
;
8613 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8614 if (dump_enabled_p ())
8616 dump_printf_loc (MSG_NOTE
, vect_location
,
8617 "==> examining pattern statement: ");
8618 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8623 if (dump_enabled_p ())
8624 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8629 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8632 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8633 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8635 /* Analyze PATTERN_STMT too. */
8636 if (dump_enabled_p ())
8638 dump_printf_loc (MSG_NOTE
, vect_location
,
8639 "==> examining pattern statement: ");
8640 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8643 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
8648 if (is_pattern_stmt_p (stmt_info
)
8650 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8652 gimple_stmt_iterator si
;
8654 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8656 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8657 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8658 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8660 /* Analyze def stmt of STMT if it's a pattern stmt. */
8661 if (dump_enabled_p ())
8663 dump_printf_loc (MSG_NOTE
, vect_location
,
8664 "==> examining pattern def statement: ");
8665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8668 if (!vect_analyze_stmt (pattern_def_stmt
,
8669 need_to_vectorize
, node
, node_instance
))
8675 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8677 case vect_internal_def
:
8680 case vect_reduction_def
:
8681 case vect_nested_cycle
:
8682 gcc_assert (!bb_vinfo
8683 && (relevance
== vect_used_in_outer
8684 || relevance
== vect_used_in_outer_by_reduction
8685 || relevance
== vect_used_by_reduction
8686 || relevance
== vect_unused_in_scope
8687 || relevance
== vect_used_only_live
));
8690 case vect_induction_def
:
8691 gcc_assert (!bb_vinfo
);
8694 case vect_constant_def
:
8695 case vect_external_def
:
8696 case vect_unknown_def_type
:
8701 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8703 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8704 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8705 || (is_gimple_call (stmt
)
8706 && gimple_call_lhs (stmt
) == NULL_TREE
));
8707 *need_to_vectorize
= true;
8710 if (PURE_SLP_STMT (stmt_info
) && !node
)
8712 dump_printf_loc (MSG_NOTE
, vect_location
,
8713 "handled only by SLP analysis\n");
8719 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8720 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8721 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8722 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8723 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8724 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8725 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8726 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8727 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8728 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8729 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
)
8730 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
8731 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8732 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8736 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8737 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8738 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8739 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8740 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8741 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8742 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8743 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8744 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8745 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8750 if (dump_enabled_p ())
8752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8753 "not vectorized: relevant stmt not ");
8754 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8755 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8764 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8765 need extra handling, except for vectorizable reductions. */
8766 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8767 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
))
8769 if (dump_enabled_p ())
8771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8772 "not vectorized: live stmt not supported: ");
8773 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8783 /* Function vect_transform_stmt.
8785 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8788 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8789 bool *grouped_store
, slp_tree slp_node
,
8790 slp_instance slp_node_instance
)
8792 bool is_store
= false;
8793 gimple
*vec_stmt
= NULL
;
8794 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8797 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8798 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8800 switch (STMT_VINFO_TYPE (stmt_info
))
8802 case type_demotion_vec_info_type
:
8803 case type_promotion_vec_info_type
:
8804 case type_conversion_vec_info_type
:
8805 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8809 case induc_vec_info_type
:
8810 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
8814 case shift_vec_info_type
:
8815 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8819 case op_vec_info_type
:
8820 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8824 case assignment_vec_info_type
:
8825 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8829 case load_vec_info_type
:
8830 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8835 case store_vec_info_type
:
8836 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8838 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8840 /* In case of interleaving, the whole chain is vectorized when the
8841 last store in the chain is reached. Store stmts before the last
8842 one are skipped, and there vec_stmt_info shouldn't be freed
8844 *grouped_store
= true;
8845 if (STMT_VINFO_VEC_STMT (stmt_info
))
8852 case condition_vec_info_type
:
8853 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8857 case comparison_vec_info_type
:
8858 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8862 case call_vec_info_type
:
8863 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8864 stmt
= gsi_stmt (*gsi
);
8865 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
8869 case call_simd_clone_vec_info_type
:
8870 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8871 stmt
= gsi_stmt (*gsi
);
8874 case reduc_vec_info_type
:
8875 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
8881 if (!STMT_VINFO_LIVE_P (stmt_info
))
8883 if (dump_enabled_p ())
8884 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8885 "stmt not supported.\n");
8890 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8891 This would break hybrid SLP vectorization. */
8893 gcc_assert (!vec_stmt
8894 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8896 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8897 is being vectorized, but outside the immediately enclosing loop. */
8899 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8900 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8901 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8902 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8903 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8904 || STMT_VINFO_RELEVANT (stmt_info
) ==
8905 vect_used_in_outer_by_reduction
))
8907 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8908 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8909 imm_use_iterator imm_iter
;
8910 use_operand_p use_p
;
8914 if (dump_enabled_p ())
8915 dump_printf_loc (MSG_NOTE
, vect_location
,
8916 "Record the vdef for outer-loop vectorization.\n");
8918 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8919 (to be used when vectorizing outer-loop stmts that use the DEF of
8921 if (gimple_code (stmt
) == GIMPLE_PHI
)
8922 scalar_dest
= PHI_RESULT (stmt
);
8924 scalar_dest
= gimple_assign_lhs (stmt
);
8926 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8928 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8930 exit_phi
= USE_STMT (use_p
);
8931 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8936 /* Handle stmts whose DEF is used outside the loop-nest that is
8937 being vectorized. */
8938 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8940 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
);
8945 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8951 /* Remove a group of stores (for SLP or interleaving), free their
8955 vect_remove_stores (gimple
*first_stmt
)
8957 gimple
*next
= first_stmt
;
8959 gimple_stmt_iterator next_si
;
8963 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8965 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8966 if (is_pattern_stmt_p (stmt_info
))
8967 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8968 /* Free the attached stmt_vec_info and remove the stmt. */
8969 next_si
= gsi_for_stmt (next
);
8970 unlink_stmt_vdef (next
);
8971 gsi_remove (&next_si
, true);
8972 release_defs (next
);
8973 free_stmt_vec_info (next
);
8979 /* Function new_stmt_vec_info.
8981 Create and initialize a new stmt_vec_info struct for STMT. */
8984 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8987 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8989 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8990 STMT_VINFO_STMT (res
) = stmt
;
8992 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8993 STMT_VINFO_LIVE_P (res
) = false;
8994 STMT_VINFO_VECTYPE (res
) = NULL
;
8995 STMT_VINFO_VEC_STMT (res
) = NULL
;
8996 STMT_VINFO_VECTORIZABLE (res
) = true;
8997 STMT_VINFO_IN_PATTERN_P (res
) = false;
8998 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8999 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
9000 STMT_VINFO_DATA_REF (res
) = NULL
;
9001 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
9002 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
9004 if (gimple_code (stmt
) == GIMPLE_PHI
9005 && is_loop_header_bb_p (gimple_bb (stmt
)))
9006 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
9008 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
9010 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
9011 STMT_SLP_TYPE (res
) = loop_vect
;
9012 STMT_VINFO_NUM_SLP_USES (res
) = 0;
9014 GROUP_FIRST_ELEMENT (res
) = NULL
;
9015 GROUP_NEXT_ELEMENT (res
) = NULL
;
9016 GROUP_SIZE (res
) = 0;
9017 GROUP_STORE_COUNT (res
) = 0;
9018 GROUP_GAP (res
) = 0;
9019 GROUP_SAME_DR_STMT (res
) = NULL
;
9025 /* Create a hash table for stmt_vec_info. */
9028 init_stmt_vec_info_vec (void)
9030 gcc_assert (!stmt_vec_info_vec
.exists ());
9031 stmt_vec_info_vec
.create (50);
9035 /* Free hash table for stmt_vec_info. */
9038 free_stmt_vec_info_vec (void)
9042 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
9044 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9045 gcc_assert (stmt_vec_info_vec
.exists ());
9046 stmt_vec_info_vec
.release ();
9050 /* Free stmt vectorization related info. */
9053 free_stmt_vec_info (gimple
*stmt
)
9055 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9060 /* Check if this statement has a related "pattern stmt"
9061 (introduced by the vectorizer during the pattern recognition
9062 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9064 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9066 stmt_vec_info patt_info
9067 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9070 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
9071 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9072 gimple_set_bb (patt_stmt
, NULL
);
9073 tree lhs
= gimple_get_lhs (patt_stmt
);
9074 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9075 release_ssa_name (lhs
);
9078 gimple_stmt_iterator si
;
9079 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
9081 gimple
*seq_stmt
= gsi_stmt (si
);
9082 gimple_set_bb (seq_stmt
, NULL
);
9083 lhs
= gimple_get_lhs (seq_stmt
);
9084 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9085 release_ssa_name (lhs
);
9086 free_stmt_vec_info (seq_stmt
);
9089 free_stmt_vec_info (patt_stmt
);
9093 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9094 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9095 set_vinfo_for_stmt (stmt
, NULL
);
9100 /* Function get_vectype_for_scalar_type_and_size.
9102 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9106 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
9108 tree orig_scalar_type
= scalar_type
;
9109 scalar_mode inner_mode
;
9110 machine_mode simd_mode
;
9114 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9115 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9118 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9120 /* For vector types of elements whose mode precision doesn't
9121 match their types precision we use a element type of mode
9122 precision. The vectorization routines will have to make sure
9123 they support the proper result truncation/extension.
9124 We also make sure to build vector types with INTEGER_TYPE
9125 component type only. */
9126 if (INTEGRAL_TYPE_P (scalar_type
)
9127 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9128 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9129 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9130 TYPE_UNSIGNED (scalar_type
));
9132 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9133 When the component mode passes the above test simply use a type
9134 corresponding to that mode. The theory is that any use that
9135 would cause problems with this will disable vectorization anyway. */
9136 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9137 && !INTEGRAL_TYPE_P (scalar_type
))
9138 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9140 /* We can't build a vector type of elements with alignment bigger than
9142 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9143 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9144 TYPE_UNSIGNED (scalar_type
));
9146 /* If we felt back to using the mode fail if there was
9147 no scalar type for it. */
9148 if (scalar_type
== NULL_TREE
)
9151 /* If no size was supplied use the mode the target prefers. Otherwise
9152 lookup a vector mode of the specified size. */
9154 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9155 else if (!mode_for_vector (inner_mode
, size
/ nbytes
).exists (&simd_mode
))
9157 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
9158 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9162 vectype
= build_vector_type (scalar_type
, nunits
);
9164 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9165 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9168 /* Re-attach the address-space qualifier if we canonicalized the scalar
9170 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9171 return build_qualified_type
9172 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9177 unsigned int current_vector_size
;
9179 /* Function get_vectype_for_scalar_type.
9181 Returns the vector type corresponding to SCALAR_TYPE as supported
9185 get_vectype_for_scalar_type (tree scalar_type
)
9188 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9189 current_vector_size
);
9191 && current_vector_size
== 0)
9192 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9196 /* Function get_mask_type_for_scalar_type.
9198 Returns the mask type corresponding to a result of comparison
9199 of vectors of specified SCALAR_TYPE as supported by target. */
9202 get_mask_type_for_scalar_type (tree scalar_type
)
9204 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9209 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9210 current_vector_size
);
9213 /* Function get_same_sized_vectype
9215 Returns a vector type corresponding to SCALAR_TYPE of size
9216 VECTOR_TYPE if supported by the target. */
9219 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9221 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9222 return build_same_sized_truth_vector_type (vector_type
);
9224 return get_vectype_for_scalar_type_and_size
9225 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9228 /* Function vect_is_simple_use.
9231 VINFO - the vect info of the loop or basic block that is being vectorized.
9232 OPERAND - operand in the loop or bb.
9234 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9235 DT - the type of definition
9237 Returns whether a stmt with OPERAND can be vectorized.
9238 For loops, supportable operands are constants, loop invariants, and operands
9239 that are defined by the current iteration of the loop. Unsupportable
9240 operands are those that are defined by a previous iteration of the loop (as
9241 is the case in reduction/induction computations).
9242 For basic blocks, supportable operands are constants and bb invariants.
9243 For now, operands defined outside the basic block are not supported. */
9246 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9247 gimple
**def_stmt
, enum vect_def_type
*dt
)
9250 *dt
= vect_unknown_def_type
;
9252 if (dump_enabled_p ())
9254 dump_printf_loc (MSG_NOTE
, vect_location
,
9255 "vect_is_simple_use: operand ");
9256 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9257 dump_printf (MSG_NOTE
, "\n");
9260 if (CONSTANT_CLASS_P (operand
))
9262 *dt
= vect_constant_def
;
9266 if (is_gimple_min_invariant (operand
))
9268 *dt
= vect_external_def
;
9272 if (TREE_CODE (operand
) != SSA_NAME
)
9274 if (dump_enabled_p ())
9275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9280 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9282 *dt
= vect_external_def
;
9286 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9287 if (dump_enabled_p ())
9289 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9290 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9293 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9294 *dt
= vect_external_def
;
9297 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9298 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9301 if (dump_enabled_p ())
9303 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9306 case vect_uninitialized_def
:
9307 dump_printf (MSG_NOTE
, "uninitialized\n");
9309 case vect_constant_def
:
9310 dump_printf (MSG_NOTE
, "constant\n");
9312 case vect_external_def
:
9313 dump_printf (MSG_NOTE
, "external\n");
9315 case vect_internal_def
:
9316 dump_printf (MSG_NOTE
, "internal\n");
9318 case vect_induction_def
:
9319 dump_printf (MSG_NOTE
, "induction\n");
9321 case vect_reduction_def
:
9322 dump_printf (MSG_NOTE
, "reduction\n");
9324 case vect_double_reduction_def
:
9325 dump_printf (MSG_NOTE
, "double reduction\n");
9327 case vect_nested_cycle
:
9328 dump_printf (MSG_NOTE
, "nested cycle\n");
9330 case vect_unknown_def_type
:
9331 dump_printf (MSG_NOTE
, "unknown\n");
9336 if (*dt
== vect_unknown_def_type
)
9338 if (dump_enabled_p ())
9339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9340 "Unsupported pattern.\n");
9344 switch (gimple_code (*def_stmt
))
9351 if (dump_enabled_p ())
9352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9353 "unsupported defining stmt:\n");
9360 /* Function vect_is_simple_use.
9362 Same as vect_is_simple_use but also determines the vector operand
9363 type of OPERAND and stores it to *VECTYPE. If the definition of
9364 OPERAND is vect_uninitialized_def, vect_constant_def or
9365 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9366 is responsible to compute the best suited vector type for the
9370 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9371 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9373 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9376 /* Now get a vector type if the def is internal, otherwise supply
9377 NULL_TREE and leave it up to the caller to figure out a proper
9378 type for the use stmt. */
9379 if (*dt
== vect_internal_def
9380 || *dt
== vect_induction_def
9381 || *dt
== vect_reduction_def
9382 || *dt
== vect_double_reduction_def
9383 || *dt
== vect_nested_cycle
)
9385 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9387 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9388 && !STMT_VINFO_RELEVANT (stmt_info
)
9389 && !STMT_VINFO_LIVE_P (stmt_info
))
9390 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9392 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9393 gcc_assert (*vectype
!= NULL_TREE
);
9395 else if (*dt
== vect_uninitialized_def
9396 || *dt
== vect_constant_def
9397 || *dt
== vect_external_def
)
9398 *vectype
= NULL_TREE
;
9406 /* Function supportable_widening_operation
9408 Check whether an operation represented by the code CODE is a
9409 widening operation that is supported by the target platform in
9410 vector form (i.e., when operating on arguments of type VECTYPE_IN
9411 producing a result of type VECTYPE_OUT).
9413 Widening operations we currently support are NOP (CONVERT), FLOAT
9414 and WIDEN_MULT. This function checks if these operations are supported
9415 by the target platform either directly (via vector tree-codes), or via
9419 - CODE1 and CODE2 are codes of vector operations to be used when
9420 vectorizing the operation, if available.
9421 - MULTI_STEP_CVT determines the number of required intermediate steps in
9422 case of multi-step conversion (like char->short->int - in that case
9423 MULTI_STEP_CVT will be 1).
9424 - INTERM_TYPES contains the intermediate type required to perform the
9425 widening operation (short in the above example). */
9428 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9429 tree vectype_out
, tree vectype_in
,
9430 enum tree_code
*code1
, enum tree_code
*code2
,
9431 int *multi_step_cvt
,
9432 vec
<tree
> *interm_types
)
9434 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9435 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9436 struct loop
*vect_loop
= NULL
;
9437 machine_mode vec_mode
;
9438 enum insn_code icode1
, icode2
;
9439 optab optab1
, optab2
;
9440 tree vectype
= vectype_in
;
9441 tree wide_vectype
= vectype_out
;
9442 enum tree_code c1
, c2
;
9444 tree prev_type
, intermediate_type
;
9445 machine_mode intermediate_mode
, prev_mode
;
9446 optab optab3
, optab4
;
9448 *multi_step_cvt
= 0;
9450 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9454 case WIDEN_MULT_EXPR
:
9455 /* The result of a vectorized widening operation usually requires
9456 two vectors (because the widened results do not fit into one vector).
9457 The generated vector results would normally be expected to be
9458 generated in the same order as in the original scalar computation,
9459 i.e. if 8 results are generated in each vector iteration, they are
9460 to be organized as follows:
9461 vect1: [res1,res2,res3,res4],
9462 vect2: [res5,res6,res7,res8].
9464 However, in the special case that the result of the widening
9465 operation is used in a reduction computation only, the order doesn't
9466 matter (because when vectorizing a reduction we change the order of
9467 the computation). Some targets can take advantage of this and
9468 generate more efficient code. For example, targets like Altivec,
9469 that support widen_mult using a sequence of {mult_even,mult_odd}
9470 generate the following vectors:
9471 vect1: [res1,res3,res5,res7],
9472 vect2: [res2,res4,res6,res8].
9474 When vectorizing outer-loops, we execute the inner-loop sequentially
9475 (each vectorized inner-loop iteration contributes to VF outer-loop
9476 iterations in parallel). We therefore don't allow to change the
9477 order of the computation in the inner-loop during outer-loop
9479 /* TODO: Another case in which order doesn't *really* matter is when we
9480 widen and then contract again, e.g. (short)((int)x * y >> 8).
9481 Normally, pack_trunc performs an even/odd permute, whereas the
9482 repack from an even/odd expansion would be an interleave, which
9483 would be significantly simpler for e.g. AVX2. */
9484 /* In any case, in order to avoid duplicating the code below, recurse
9485 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9486 are properly set up for the caller. If we fail, we'll continue with
9487 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9489 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9490 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9491 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9492 stmt
, vectype_out
, vectype_in
,
9493 code1
, code2
, multi_step_cvt
,
9496 /* Elements in a vector with vect_used_by_reduction property cannot
9497 be reordered if the use chain with this property does not have the
9498 same operation. One such an example is s += a * b, where elements
9499 in a and b cannot be reordered. Here we check if the vector defined
9500 by STMT is only directly used in the reduction statement. */
9501 tree lhs
= gimple_assign_lhs (stmt
);
9502 use_operand_p dummy
;
9504 stmt_vec_info use_stmt_info
= NULL
;
9505 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9506 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9507 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9510 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9511 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9524 case VEC_WIDEN_MULT_EVEN_EXPR
:
9525 /* Support the recursion induced just above. */
9526 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9527 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9530 case WIDEN_LSHIFT_EXPR
:
9531 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9532 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9536 c1
= VEC_UNPACK_LO_EXPR
;
9537 c2
= VEC_UNPACK_HI_EXPR
;
9541 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9542 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9545 case FIX_TRUNC_EXPR
:
9546 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9547 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9548 computing the operation. */
9555 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9558 if (code
== FIX_TRUNC_EXPR
)
9560 /* The signedness is determined from output operand. */
9561 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9562 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9566 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9567 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9570 if (!optab1
|| !optab2
)
9573 vec_mode
= TYPE_MODE (vectype
);
9574 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9575 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9581 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9582 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9583 /* For scalar masks we may have different boolean
9584 vector types having the same QImode. Thus we
9585 add additional check for elements number. */
9586 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9587 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9588 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9590 /* Check if it's a multi-step conversion that can be done using intermediate
9593 prev_type
= vectype
;
9594 prev_mode
= vec_mode
;
9596 if (!CONVERT_EXPR_CODE_P (code
))
9599 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9600 intermediate steps in promotion sequence. We try
9601 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9603 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9604 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9606 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9607 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9610 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9611 current_vector_size
);
9612 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9617 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9618 TYPE_UNSIGNED (prev_type
));
9620 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9621 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9623 if (!optab3
|| !optab4
9624 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9625 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9626 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9627 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9628 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9629 == CODE_FOR_nothing
)
9630 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9631 == CODE_FOR_nothing
))
9634 interm_types
->quick_push (intermediate_type
);
9635 (*multi_step_cvt
)++;
9637 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9638 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9639 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9640 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9641 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9643 prev_type
= intermediate_type
;
9644 prev_mode
= intermediate_mode
;
9647 interm_types
->release ();
9652 /* Function supportable_narrowing_operation
9654 Check whether an operation represented by the code CODE is a
9655 narrowing operation that is supported by the target platform in
9656 vector form (i.e., when operating on arguments of type VECTYPE_IN
9657 and producing a result of type VECTYPE_OUT).
9659 Narrowing operations we currently support are NOP (CONVERT) and
9660 FIX_TRUNC. This function checks if these operations are supported by
9661 the target platform directly via vector tree-codes.
9664 - CODE1 is the code of a vector operation to be used when
9665 vectorizing the operation, if available.
9666 - MULTI_STEP_CVT determines the number of required intermediate steps in
9667 case of multi-step conversion (like int->short->char - in that case
9668 MULTI_STEP_CVT will be 1).
9669 - INTERM_TYPES contains the intermediate type required to perform the
9670 narrowing operation (short in the above example). */
9673 supportable_narrowing_operation (enum tree_code code
,
9674 tree vectype_out
, tree vectype_in
,
9675 enum tree_code
*code1
, int *multi_step_cvt
,
9676 vec
<tree
> *interm_types
)
9678 machine_mode vec_mode
;
9679 enum insn_code icode1
;
9680 optab optab1
, interm_optab
;
9681 tree vectype
= vectype_in
;
9682 tree narrow_vectype
= vectype_out
;
9684 tree intermediate_type
, prev_type
;
9685 machine_mode intermediate_mode
, prev_mode
;
9689 *multi_step_cvt
= 0;
9693 c1
= VEC_PACK_TRUNC_EXPR
;
9696 case FIX_TRUNC_EXPR
:
9697 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9701 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9702 tree code and optabs used for computing the operation. */
9709 if (code
== FIX_TRUNC_EXPR
)
9710 /* The signedness is determined from output operand. */
9711 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9713 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9718 vec_mode
= TYPE_MODE (vectype
);
9719 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9724 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9725 /* For scalar masks we may have different boolean
9726 vector types having the same QImode. Thus we
9727 add additional check for elements number. */
9728 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9729 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9730 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9732 /* Check if it's a multi-step conversion that can be done using intermediate
9734 prev_mode
= vec_mode
;
9735 prev_type
= vectype
;
9736 if (code
== FIX_TRUNC_EXPR
)
9737 uns
= TYPE_UNSIGNED (vectype_out
);
9739 uns
= TYPE_UNSIGNED (vectype
);
9741 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9742 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9743 costly than signed. */
9744 if (code
== FIX_TRUNC_EXPR
&& uns
)
9746 enum insn_code icode2
;
9749 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9751 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9752 if (interm_optab
!= unknown_optab
9753 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9754 && insn_data
[icode1
].operand
[0].mode
9755 == insn_data
[icode2
].operand
[0].mode
)
9758 optab1
= interm_optab
;
9763 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9764 intermediate steps in promotion sequence. We try
9765 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9766 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9767 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9769 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9770 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9773 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9774 current_vector_size
);
9775 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9780 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9782 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9785 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9786 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9787 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9788 == CODE_FOR_nothing
))
9791 interm_types
->quick_push (intermediate_type
);
9792 (*multi_step_cvt
)++;
9794 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9795 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9796 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9797 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9799 prev_mode
= intermediate_mode
;
9800 prev_type
= intermediate_type
;
9801 optab1
= interm_optab
;
9804 interm_types
->release ();