1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
35 #include "gimple-expr.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-ssa.h"
43 #include "tree-phinodes.h"
44 #include "ssa-iterators.h"
45 #include "stringpool.h"
46 #include "tree-ssanames.h"
47 #include "tree-ssa-loop-manip.h"
49 #include "tree-ssa-loop.h"
50 #include "tree-scalar-evolution.h"
52 #include "recog.h" /* FIXME: for insn_data */
54 #include "diagnostic-core.h"
55 #include "tree-vectorizer.h"
60 /* For lang_hooks.types.type_for_mode. */
61 #include "langhooks.h"
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
68 return STMT_VINFO_VECTYPE (stmt_info
);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
76 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
77 basic_block bb
= gimple_bb (stmt
);
78 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
84 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
86 return (bb
->loop_father
== loop
->inner
);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
95 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
96 int misalign
, enum vect_cost_model_location where
)
100 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
101 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
102 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
105 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
110 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
111 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
112 void *target_cost_data
;
115 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
117 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
119 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
124 /* Return a variable of type ELEM_TYPE[NELEMS]. */
127 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
129 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
133 /* ARRAY is an array of vectors created by create_vector_array.
134 Return an SSA_NAME for the vector in index N. The reference
135 is part of the vectorization of STMT and the vector is associated
136 with scalar destination SCALAR_DEST. */
139 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
140 tree array
, unsigned HOST_WIDE_INT n
)
142 tree vect_type
, vect
, vect_name
, array_ref
;
145 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
146 vect_type
= TREE_TYPE (TREE_TYPE (array
));
147 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
148 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
149 build_int_cst (size_type_node
, n
),
150 NULL_TREE
, NULL_TREE
);
152 new_stmt
= gimple_build_assign (vect
, array_ref
);
153 vect_name
= make_ssa_name (vect
, new_stmt
);
154 gimple_assign_set_lhs (new_stmt
, vect_name
);
155 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* ARRAY is an array of vectors created by create_vector_array.
161 Emit code to store SSA_NAME VECT in index N of the array.
162 The store is part of the vectorization of STMT. */
165 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
166 tree array
, unsigned HOST_WIDE_INT n
)
171 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
172 build_int_cst (size_type_node
, n
),
173 NULL_TREE
, NULL_TREE
);
175 new_stmt
= gimple_build_assign (array_ref
, vect
);
176 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
179 /* PTR is a pointer to an array of type TYPE. Return a representation
180 of *PTR. The memory reference replaces those in FIRST_DR
184 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
186 tree mem_ref
, alias_ptr_type
;
188 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
189 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
190 /* Arrays have the same alignment as their type. */
191 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
195 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197 /* Function vect_mark_relevant.
199 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
202 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
203 enum vect_relevant relevant
, bool live_p
,
204 bool used_in_pattern
)
206 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
207 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
208 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE
, vect_location
,
213 "mark relevant %d, live %d.\n", relevant
, live_p
);
215 /* If this stmt is an original stmt in a pattern, we might need to mark its
216 related pattern stmt instead of the original stmt. However, such stmts
217 may have their own uses that are not in any pattern, in such cases the
218 stmt itself should be marked. */
219 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
222 if (!used_in_pattern
)
224 imm_use_iterator imm_iter
;
228 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
229 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
231 if (is_gimple_assign (stmt
))
232 lhs
= gimple_assign_lhs (stmt
);
234 lhs
= gimple_call_lhs (stmt
);
236 /* This use is out of pattern use, if LHS has other uses that are
237 pattern uses, we should mark the stmt itself, and not the pattern
239 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
240 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
242 if (is_gimple_debug (USE_STMT (use_p
)))
244 use_stmt
= USE_STMT (use_p
);
246 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
249 if (vinfo_for_stmt (use_stmt
)
250 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
260 /* This is the last stmt in a sequence that was detected as a
261 pattern that can potentially be vectorized. Don't mark the stmt
262 as relevant/live because it's not going to be vectorized.
263 Instead mark the pattern-stmt that replaces it. */
265 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_NOTE
, vect_location
,
269 "last stmt in pattern. don't mark"
270 " relevant/live.\n");
271 stmt_info
= vinfo_for_stmt (pattern_stmt
);
272 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
273 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
274 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
279 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
280 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
281 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
283 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
284 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
286 if (dump_enabled_p ())
287 dump_printf_loc (MSG_NOTE
, vect_location
,
288 "already marked relevant/live.\n");
292 worklist
->safe_push (stmt
);
296 /* Function vect_stmt_relevant_p.
298 Return true if STMT in loop that is represented by LOOP_VINFO is
299 "relevant for vectorization".
301 A stmt is considered "relevant for vectorization" if:
302 - it has uses outside the loop.
303 - it has vdefs (it alters memory).
304 - control stmts in the loop (except for the exit condition).
306 CHECKME: what other side effects would the vectorizer allow? */
309 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
310 enum vect_relevant
*relevant
, bool *live_p
)
312 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
314 imm_use_iterator imm_iter
;
318 *relevant
= vect_unused_in_scope
;
321 /* cond stmt other than loop exit cond. */
322 if (is_ctrl_stmt (stmt
)
323 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
324 != loop_exit_ctrl_vec_info_type
)
325 *relevant
= vect_used_in_scope
;
327 /* changing memory. */
328 if (gimple_code (stmt
) != GIMPLE_PHI
)
329 if (gimple_vdef (stmt
))
331 if (dump_enabled_p ())
332 dump_printf_loc (MSG_NOTE
, vect_location
,
333 "vec_stmt_relevant_p: stmt has vdefs.\n");
334 *relevant
= vect_used_in_scope
;
337 /* uses outside the loop. */
338 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
340 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
342 basic_block bb
= gimple_bb (USE_STMT (use_p
));
343 if (!flow_bb_inside_loop_p (loop
, bb
))
345 if (dump_enabled_p ())
346 dump_printf_loc (MSG_NOTE
, vect_location
,
347 "vec_stmt_relevant_p: used out of loop.\n");
349 if (is_gimple_debug (USE_STMT (use_p
)))
352 /* We expect all such uses to be in the loop exit phis
353 (because of loop closed form) */
354 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
355 gcc_assert (bb
== single_exit (loop
)->dest
);
362 return (*live_p
|| *relevant
);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
372 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
375 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info
))
383 /* STMT has a data_ref. FORNOW this means that its of one of
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt
))
398 if (is_gimple_call (stmt
)
399 && gimple_call_internal_p (stmt
))
400 switch (gimple_call_internal_fn (stmt
))
403 operand
= gimple_call_arg (stmt
, 3);
408 operand
= gimple_call_arg (stmt
, 2);
418 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
420 operand
= gimple_assign_rhs1 (stmt
);
421 if (TREE_CODE (operand
) != SSA_NAME
)
432 Function process_use.
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
456 Return true if everything is as expected. Return false otherwise. */
459 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
460 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
463 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
464 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
465 stmt_vec_info dstmt_vinfo
;
466 basic_block bb
, def_bb
;
469 enum vect_def_type dt
;
471 /* case 1: we are only interested in uses that need to be vectorized. Uses
472 that are used for address computation are not considered relevant. */
473 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
476 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
478 if (dump_enabled_p ())
479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
480 "not vectorized: unsupported use in stmt.\n");
484 if (!def_stmt
|| gimple_nop_p (def_stmt
))
487 def_bb
= gimple_bb (def_stmt
);
488 if (!flow_bb_inside_loop_p (loop
, def_bb
))
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
495 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
496 DEF_STMT must have already been processed, because this should be the
497 only way that STMT, which is a reduction-phi, was put in the worklist,
498 as there should be no other uses for DEF_STMT in the loop. So we just
499 check that everything is as expected, and we are done. */
500 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
501 bb
= gimple_bb (stmt
);
502 if (gimple_code (stmt
) == GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
504 && gimple_code (def_stmt
) != GIMPLE_PHI
505 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
506 && bb
->loop_father
== def_bb
->loop_father
)
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE
, vect_location
,
510 "reduc-stmt defining reduc-phi in the same nest.\n");
511 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
512 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
513 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
514 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
515 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
519 /* case 3a: outer-loop stmt defining an inner-loop stmt:
520 outer-loop-header-bb:
526 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
528 if (dump_enabled_p ())
529 dump_printf_loc (MSG_NOTE
, vect_location
,
530 "outer-loop def-stmt defining inner-loop stmt.\n");
534 case vect_unused_in_scope
:
535 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
536 vect_used_in_scope
: vect_unused_in_scope
;
539 case vect_used_in_outer_by_reduction
:
540 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
541 relevant
= vect_used_by_reduction
;
544 case vect_used_in_outer
:
545 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
546 relevant
= vect_used_in_scope
;
549 case vect_used_in_scope
:
557 /* case 3b: inner-loop stmt defining an outer-loop stmt:
558 outer-loop-header-bb:
562 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
564 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
566 if (dump_enabled_p ())
567 dump_printf_loc (MSG_NOTE
, vect_location
,
568 "inner-loop def-stmt defining outer-loop stmt.\n");
572 case vect_unused_in_scope
:
573 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
574 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
575 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
578 case vect_used_by_reduction
:
579 relevant
= vect_used_in_outer_by_reduction
;
582 case vect_used_in_scope
:
583 relevant
= vect_used_in_outer
;
591 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
592 is_pattern_stmt_p (stmt_vinfo
));
597 /* Function vect_mark_stmts_to_be_vectorized.
599 Not all stmts in the loop need to be vectorized. For example:
608 Stmt 1 and 3 do not need to be vectorized, because loop control and
609 addressing of vectorized data-refs are handled differently.
611 This pass detects such stmts. */
614 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
616 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
617 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
618 unsigned int nbbs
= loop
->num_nodes
;
619 gimple_stmt_iterator si
;
622 stmt_vec_info stmt_vinfo
;
626 enum vect_relevant relevant
, tmp_relevant
;
627 enum vect_def_type def_type
;
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE
, vect_location
,
631 "=== vect_mark_stmts_to_be_vectorized ===\n");
633 auto_vec
<gimple
, 64> worklist
;
635 /* 1. Init worklist. */
636 for (i
= 0; i
< nbbs
; i
++)
639 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
642 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
645 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
646 dump_printf (MSG_NOTE
, "\n");
649 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
650 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
652 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
654 stmt
= gsi_stmt (si
);
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
658 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
659 dump_printf (MSG_NOTE
, "\n");
662 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
663 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
667 /* 2. Process_worklist */
668 while (worklist
.length () > 0)
673 stmt
= worklist
.pop ();
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
677 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
678 dump_printf (MSG_NOTE
, "\n");
681 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
682 (DEF_STMT) as relevant/irrelevant and live/dead according to the
683 liveness and relevance properties of STMT. */
684 stmt_vinfo
= vinfo_for_stmt (stmt
);
685 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
686 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
688 /* Generally, the liveness and relevance properties of STMT are
689 propagated as is to the DEF_STMTs of its USEs:
690 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
691 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
693 One exception is when STMT has been identified as defining a reduction
694 variable; in this case we set the liveness/relevance as follows:
696 relevant = vect_used_by_reduction
697 This is because we distinguish between two kinds of relevant stmts -
698 those that are used by a reduction computation, and those that are
699 (also) used by a regular computation. This allows us later on to
700 identify stmts that are used solely by a reduction, and therefore the
701 order of the results that they produce does not have to be kept. */
703 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
704 tmp_relevant
= relevant
;
707 case vect_reduction_def
:
708 switch (tmp_relevant
)
710 case vect_unused_in_scope
:
711 relevant
= vect_used_by_reduction
;
714 case vect_used_by_reduction
:
715 if (gimple_code (stmt
) == GIMPLE_PHI
)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "unsupported use of reduction.\n");
729 case vect_nested_cycle
:
730 if (tmp_relevant
!= vect_unused_in_scope
731 && tmp_relevant
!= vect_used_in_outer_by_reduction
732 && tmp_relevant
!= vect_used_in_outer
)
734 if (dump_enabled_p ())
735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
736 "unsupported use of nested cycle.\n");
744 case vect_double_reduction_def
:
745 if (tmp_relevant
!= vect_unused_in_scope
746 && tmp_relevant
!= vect_used_by_reduction
)
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
750 "unsupported use of double reduction.\n");
762 if (is_pattern_stmt_p (stmt_vinfo
))
764 /* Pattern statements are not inserted into the code, so
765 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
766 have to scan the RHS or function arguments instead. */
767 if (is_gimple_assign (stmt
))
769 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
770 tree op
= gimple_assign_rhs1 (stmt
);
773 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
775 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
776 live_p
, relevant
, &worklist
, false)
777 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
778 live_p
, relevant
, &worklist
, false))
782 for (; i
< gimple_num_ops (stmt
); i
++)
784 op
= gimple_op (stmt
, i
);
785 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
790 else if (is_gimple_call (stmt
))
792 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
794 tree arg
= gimple_call_arg (stmt
, i
);
795 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
802 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
804 tree op
= USE_FROM_PTR (use_p
);
805 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
810 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
813 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
815 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
819 } /* while worklist */
825 /* Function vect_model_simple_cost.
827 Models cost for simple operations, i.e. those that only emit ncopies of a
828 single op. Right now, this does not account for multiple insns that could
829 be generated for the single vector op. We will handle that shortly. */
832 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
833 enum vect_def_type
*dt
,
834 stmt_vector_for_cost
*prologue_cost_vec
,
835 stmt_vector_for_cost
*body_cost_vec
)
838 int inside_cost
= 0, prologue_cost
= 0;
840 /* The SLP costs were already calculated during SLP tree build. */
841 if (PURE_SLP_STMT (stmt_info
))
844 /* FORNOW: Assuming maximum 2 args per stmts. */
845 for (i
= 0; i
< 2; i
++)
846 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
847 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
848 stmt_info
, 0, vect_prologue
);
850 /* Pass the inside-of-loop statements to the target-specific cost model. */
851 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
852 stmt_info
, 0, vect_body
);
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_NOTE
, vect_location
,
856 "vect_model_simple_cost: inside_cost = %d, "
857 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
861 /* Model cost for type demotion and promotion operations. PWR is normally
862 zero for single-step promotions and demotions. It will be one if
863 two-step promotion/demotion is required, and so on. Each additional
864 step doubles the number of instructions required. */
867 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
868 enum vect_def_type
*dt
, int pwr
)
871 int inside_cost
= 0, prologue_cost
= 0;
872 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
873 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
874 void *target_cost_data
;
876 /* The SLP costs were already calculated during SLP tree build. */
877 if (PURE_SLP_STMT (stmt_info
))
881 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
883 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
885 for (i
= 0; i
< pwr
+ 1; i
++)
887 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
889 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
890 vec_promote_demote
, stmt_info
, 0,
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i
= 0; i
< 2; i
++)
896 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
897 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
898 stmt_info
, 0, vect_prologue
);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE
, vect_location
,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
906 /* Function vect_cost_group_size
908 For grouped load or store, return the group_size only if it is the first
909 load or store of a group, else return 1. This ensures that group size is
910 only returned once per group. */
913 vect_cost_group_size (stmt_vec_info stmt_info
)
915 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
917 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
918 return GROUP_SIZE (stmt_info
);
924 /* Function vect_model_store_cost
926 Models cost for stores. In the case of grouped accesses, one access
927 has the overhead of the grouped access attributed to it. */
930 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
931 bool store_lanes_p
, enum vect_def_type dt
,
933 stmt_vector_for_cost
*prologue_cost_vec
,
934 stmt_vector_for_cost
*body_cost_vec
)
937 unsigned int inside_cost
= 0, prologue_cost
= 0;
938 struct data_reference
*first_dr
;
941 /* The SLP costs were already calculated during SLP tree build. */
942 if (PURE_SLP_STMT (stmt_info
))
945 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
946 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
947 stmt_info
, 0, vect_prologue
);
949 /* Grouped access? */
950 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
954 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
959 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
960 group_size
= vect_cost_group_size (stmt_info
);
963 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
965 /* Not a grouped access. */
969 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
972 /* We assume that the cost of a single store-lanes instruction is
973 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
974 access is instead being provided by a permute-and-store operation,
975 include the cost of the permutes. */
976 if (!store_lanes_p
&& group_size
> 1)
978 /* Uses a high and low interleave or shuffle operations for each
980 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
981 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
982 stmt_info
, 0, vect_body
);
984 if (dump_enabled_p ())
985 dump_printf_loc (MSG_NOTE
, vect_location
,
986 "vect_model_store_cost: strided group_size = %d .\n",
990 /* Costs of the stores. */
991 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_NOTE
, vect_location
,
995 "vect_model_store_cost: inside_cost = %d, "
996 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1000 /* Calculate cost of DR's memory access. */
1002 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1003 unsigned int *inside_cost
,
1004 stmt_vector_for_cost
*body_cost_vec
)
1006 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1007 gimple stmt
= DR_STMT (dr
);
1008 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1010 switch (alignment_support_scheme
)
1014 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1015 vector_store
, stmt_info
, 0,
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE
, vect_location
,
1020 "vect_model_store_cost: aligned.\n");
1024 case dr_unaligned_supported
:
1026 /* Here, we assign an additional cost for the unaligned store. */
1027 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1028 unaligned_store
, stmt_info
,
1029 DR_MISALIGNMENT (dr
), vect_body
);
1030 if (dump_enabled_p ())
1031 dump_printf_loc (MSG_NOTE
, vect_location
,
1032 "vect_model_store_cost: unaligned supported by "
1037 case dr_unaligned_unsupported
:
1039 *inside_cost
= VECT_MAX_COST
;
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1043 "vect_model_store_cost: unsupported access.\n");
1053 /* Function vect_model_load_cost
1055 Models cost for loads. In the case of grouped accesses, the last access
1056 has the overhead of the grouped access attributed to it. Since unaligned
1057 accesses are supported for loads, we also account for the costs of the
1058 access scheme chosen. */
1061 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1062 bool load_lanes_p
, slp_tree slp_node
,
1063 stmt_vector_for_cost
*prologue_cost_vec
,
1064 stmt_vector_for_cost
*body_cost_vec
)
1068 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1069 unsigned int inside_cost
= 0, prologue_cost
= 0;
1071 /* The SLP costs were already calculated during SLP tree build. */
1072 if (PURE_SLP_STMT (stmt_info
))
1075 /* Grouped accesses? */
1076 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1077 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1079 group_size
= vect_cost_group_size (stmt_info
);
1080 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1082 /* Not a grouped access. */
1089 /* We assume that the cost of a single load-lanes instruction is
1090 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1091 access is instead being provided by a load-and-permute operation,
1092 include the cost of the permutes. */
1093 if (!load_lanes_p
&& group_size
> 1)
1095 /* Uses an even and odd extract operations or shuffle operations
1096 for each needed permute. */
1097 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1098 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1099 stmt_info
, 0, vect_body
);
1101 if (dump_enabled_p ())
1102 dump_printf_loc (MSG_NOTE
, vect_location
,
1103 "vect_model_load_cost: strided group_size = %d .\n",
1107 /* The loads themselves. */
1108 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1110 /* N scalar loads plus gathering them into a vector. */
1111 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1112 inside_cost
+= record_stmt_cost (body_cost_vec
,
1113 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1114 scalar_load
, stmt_info
, 0, vect_body
);
1115 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1116 stmt_info
, 0, vect_body
);
1119 vect_get_load_cost (first_dr
, ncopies
,
1120 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1121 || group_size
> 1 || slp_node
),
1122 &inside_cost
, &prologue_cost
,
1123 prologue_cost_vec
, body_cost_vec
, true);
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_NOTE
, vect_location
,
1127 "vect_model_load_cost: inside_cost = %d, "
1128 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1132 /* Calculate cost of DR's memory access. */
1134 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1135 bool add_realign_cost
, unsigned int *inside_cost
,
1136 unsigned int *prologue_cost
,
1137 stmt_vector_for_cost
*prologue_cost_vec
,
1138 stmt_vector_for_cost
*body_cost_vec
,
1139 bool record_prologue_costs
)
1141 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1142 gimple stmt
= DR_STMT (dr
);
1143 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1145 switch (alignment_support_scheme
)
1149 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1150 stmt_info
, 0, vect_body
);
1152 if (dump_enabled_p ())
1153 dump_printf_loc (MSG_NOTE
, vect_location
,
1154 "vect_model_load_cost: aligned.\n");
1158 case dr_unaligned_supported
:
1160 /* Here, we assign an additional cost for the unaligned load. */
1161 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1162 unaligned_load
, stmt_info
,
1163 DR_MISALIGNMENT (dr
), vect_body
);
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE
, vect_location
,
1167 "vect_model_load_cost: unaligned supported by "
1172 case dr_explicit_realign
:
1174 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1175 vector_load
, stmt_info
, 0, vect_body
);
1176 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1177 vec_perm
, stmt_info
, 0, vect_body
);
1179 /* FIXME: If the misalignment remains fixed across the iterations of
1180 the containing loop, the following cost should be added to the
1182 if (targetm
.vectorize
.builtin_mask_for_load
)
1183 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1184 stmt_info
, 0, vect_body
);
1186 if (dump_enabled_p ())
1187 dump_printf_loc (MSG_NOTE
, vect_location
,
1188 "vect_model_load_cost: explicit realign\n");
1192 case dr_explicit_realign_optimized
:
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE
, vect_location
,
1196 "vect_model_load_cost: unaligned software "
1199 /* Unaligned software pipeline has a load of an address, an initial
1200 load, and possibly a mask operation to "prime" the loop. However,
1201 if this is an access in a group of loads, which provide grouped
1202 access, then the above cost should only be considered for one
1203 access in the group. Inside the loop, there is a load op
1204 and a realignment op. */
1206 if (add_realign_cost
&& record_prologue_costs
)
1208 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1209 vector_stmt
, stmt_info
,
1211 if (targetm
.vectorize
.builtin_mask_for_load
)
1212 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1213 vector_stmt
, stmt_info
,
1217 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1218 stmt_info
, 0, vect_body
);
1219 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1220 stmt_info
, 0, vect_body
);
1222 if (dump_enabled_p ())
1223 dump_printf_loc (MSG_NOTE
, vect_location
,
1224 "vect_model_load_cost: explicit realign optimized"
1230 case dr_unaligned_unsupported
:
1232 *inside_cost
= VECT_MAX_COST
;
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1236 "vect_model_load_cost: unsupported access.\n");
1245 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1246 the loop preheader for the vectorized stmt STMT. */
1249 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1252 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1255 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1256 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1260 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1264 if (nested_in_vect_loop_p (loop
, stmt
))
1267 pe
= loop_preheader_edge (loop
);
1268 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1269 gcc_assert (!new_bb
);
1273 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1275 gimple_stmt_iterator gsi_bb_start
;
1277 gcc_assert (bb_vinfo
);
1278 bb
= BB_VINFO_BB (bb_vinfo
);
1279 gsi_bb_start
= gsi_after_labels (bb
);
1280 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1284 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE
, vect_location
,
1287 "created new init_stmt: ");
1288 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1289 dump_printf (MSG_NOTE
, "\n");
1293 /* Function vect_init_vector.
1295 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1296 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1297 vector type a vector with all elements equal to VAL is created first.
1298 Place the initialization at BSI if it is not NULL. Otherwise, place the
1299 initialization at the loop preheader.
1300 Return the DEF of INIT_STMT.
1301 It will be used in the vectorization of STMT. */
1304 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1311 if (TREE_CODE (type
) == VECTOR_TYPE
1312 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1314 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1316 if (CONSTANT_CLASS_P (val
))
1317 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1320 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1321 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1324 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1328 val
= build_vector_from_val (type
, val
);
1331 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1332 init_stmt
= gimple_build_assign (new_var
, val
);
1333 new_temp
= make_ssa_name (new_var
, init_stmt
);
1334 gimple_assign_set_lhs (init_stmt
, new_temp
);
1335 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1336 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1341 /* Function vect_get_vec_def_for_operand.
1343 OP is an operand in STMT. This function returns a (vector) def that will be
1344 used in the vectorized stmt for STMT.
1346 In the case that OP is an SSA_NAME which is defined in the loop, then
1347 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1349 In case OP is an invariant or constant, a new stmt that creates a vector def
1350 needs to be introduced. */
1353 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1358 stmt_vec_info def_stmt_info
= NULL
;
1359 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1360 unsigned int nunits
;
1361 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1363 enum vect_def_type dt
;
1367 if (dump_enabled_p ())
1369 dump_printf_loc (MSG_NOTE
, vect_location
,
1370 "vect_get_vec_def_for_operand: ");
1371 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1372 dump_printf (MSG_NOTE
, "\n");
1375 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1376 &def_stmt
, &def
, &dt
);
1377 gcc_assert (is_simple_use
);
1378 if (dump_enabled_p ())
1380 int loc_printed
= 0;
1383 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1385 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1386 dump_printf (MSG_NOTE
, "\n");
1391 dump_printf (MSG_NOTE
, " def_stmt = ");
1393 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1394 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1395 dump_printf (MSG_NOTE
, "\n");
1401 /* Case 1: operand is a constant. */
1402 case vect_constant_def
:
1404 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1405 gcc_assert (vector_type
);
1406 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1411 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1412 if (dump_enabled_p ())
1413 dump_printf_loc (MSG_NOTE
, vect_location
,
1414 "Create vector_cst. nunits = %d\n", nunits
);
1416 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1419 /* Case 2: operand is defined outside the loop - loop invariant. */
1420 case vect_external_def
:
1422 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1423 gcc_assert (vector_type
);
1428 /* Create 'vec_inv = {inv,inv,..,inv}' */
1429 if (dump_enabled_p ())
1430 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1432 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1435 /* Case 3: operand is defined inside the loop. */
1436 case vect_internal_def
:
1439 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1441 /* Get the def from the vectorized stmt. */
1442 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1444 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1445 /* Get vectorized pattern statement. */
1447 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1448 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1449 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1450 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1451 gcc_assert (vec_stmt
);
1452 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1453 vec_oprnd
= PHI_RESULT (vec_stmt
);
1454 else if (is_gimple_call (vec_stmt
))
1455 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1457 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1461 /* Case 4: operand is defined by a loop header phi - reduction */
1462 case vect_reduction_def
:
1463 case vect_double_reduction_def
:
1464 case vect_nested_cycle
:
1468 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1469 loop
= (gimple_bb (def_stmt
))->loop_father
;
1471 /* Get the def before the loop */
1472 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1473 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1476 /* Case 5: operand is defined by loop-header phi - induction. */
1477 case vect_induction_def
:
1479 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1481 /* Get the def from the vectorized stmt. */
1482 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1483 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1484 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1485 vec_oprnd
= PHI_RESULT (vec_stmt
);
1487 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1497 /* Function vect_get_vec_def_for_stmt_copy
1499 Return a vector-def for an operand. This function is used when the
1500 vectorized stmt to be created (by the caller to this function) is a "copy"
1501 created in case the vectorized result cannot fit in one vector, and several
1502 copies of the vector-stmt are required. In this case the vector-def is
1503 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1504 of the stmt that defines VEC_OPRND.
1505 DT is the type of the vector def VEC_OPRND.
1508 In case the vectorization factor (VF) is bigger than the number
1509 of elements that can fit in a vectype (nunits), we have to generate
1510 more than one vector stmt to vectorize the scalar stmt. This situation
1511 arises when there are multiple data-types operated upon in the loop; the
1512 smallest data-type determines the VF, and as a result, when vectorizing
1513 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1514 vector stmt (each computing a vector of 'nunits' results, and together
1515 computing 'VF' results in each iteration). This function is called when
1516 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1517 which VF=16 and nunits=4, so the number of copies required is 4):
1519 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1521 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1522 VS1.1: vx.1 = memref1 VS1.2
1523 VS1.2: vx.2 = memref2 VS1.3
1524 VS1.3: vx.3 = memref3
1526 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1527 VSnew.1: vz1 = vx.1 + ... VSnew.2
1528 VSnew.2: vz2 = vx.2 + ... VSnew.3
1529 VSnew.3: vz3 = vx.3 + ...
1531 The vectorization of S1 is explained in vectorizable_load.
1532 The vectorization of S2:
1533 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1534 the function 'vect_get_vec_def_for_operand' is called to
1535 get the relevant vector-def for each operand of S2. For operand x it
1536 returns the vector-def 'vx.0'.
1538 To create the remaining copies of the vector-stmt (VSnew.j), this
1539 function is called to get the relevant vector-def for each operand. It is
1540 obtained from the respective VS1.j stmt, which is recorded in the
1541 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1543 For example, to obtain the vector-def 'vx.1' in order to create the
1544 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1545 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1546 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1547 and return its def ('vx.1').
1548 Overall, to create the above sequence this function will be called 3 times:
1549 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1550 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1551 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1554 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1556 gimple vec_stmt_for_operand
;
1557 stmt_vec_info def_stmt_info
;
1559 /* Do nothing; can reuse same def. */
1560 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1563 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1564 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1565 gcc_assert (def_stmt_info
);
1566 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1567 gcc_assert (vec_stmt_for_operand
);
1568 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1569 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1570 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1572 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1577 /* Get vectorized definitions for the operands to create a copy of an original
1578 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1581 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1582 vec
<tree
> *vec_oprnds0
,
1583 vec
<tree
> *vec_oprnds1
)
1585 tree vec_oprnd
= vec_oprnds0
->pop ();
1587 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1588 vec_oprnds0
->quick_push (vec_oprnd
);
1590 if (vec_oprnds1
&& vec_oprnds1
->length ())
1592 vec_oprnd
= vec_oprnds1
->pop ();
1593 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1594 vec_oprnds1
->quick_push (vec_oprnd
);
1599 /* Get vectorized definitions for OP0 and OP1.
1600 REDUC_INDEX is the index of reduction operand in case of reduction,
1601 and -1 otherwise. */
1604 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1605 vec
<tree
> *vec_oprnds0
,
1606 vec
<tree
> *vec_oprnds1
,
1607 slp_tree slp_node
, int reduc_index
)
1611 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1612 auto_vec
<tree
> ops (nops
);
1613 auto_vec
<vec
<tree
> > vec_defs (nops
);
1615 ops
.quick_push (op0
);
1617 ops
.quick_push (op1
);
1619 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1621 *vec_oprnds0
= vec_defs
[0];
1623 *vec_oprnds1
= vec_defs
[1];
1629 vec_oprnds0
->create (1);
1630 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1631 vec_oprnds0
->quick_push (vec_oprnd
);
1635 vec_oprnds1
->create (1);
1636 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1637 vec_oprnds1
->quick_push (vec_oprnd
);
1643 /* Function vect_finish_stmt_generation.
1645 Insert a new stmt. */
1648 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1649 gimple_stmt_iterator
*gsi
)
1651 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1652 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1653 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1655 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1657 if (!gsi_end_p (*gsi
)
1658 && gimple_has_mem_ops (vec_stmt
))
1660 gimple at_stmt
= gsi_stmt (*gsi
);
1661 tree vuse
= gimple_vuse (at_stmt
);
1662 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1664 tree vdef
= gimple_vdef (at_stmt
);
1665 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1666 /* If we have an SSA vuse and insert a store, update virtual
1667 SSA form to avoid triggering the renamer. Do so only
1668 if we can easily see all uses - which is what almost always
1669 happens with the way vectorized stmts are inserted. */
1670 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1671 && ((is_gimple_assign (vec_stmt
)
1672 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1673 || (is_gimple_call (vec_stmt
)
1674 && !(gimple_call_flags (vec_stmt
)
1675 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1677 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1678 gimple_set_vdef (vec_stmt
, new_vdef
);
1679 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1683 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1685 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1688 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1691 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1692 dump_printf (MSG_NOTE
, "\n");
1695 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1697 /* While EH edges will generally prevent vectorization, stmt might
1698 e.g. be in a must-not-throw region. Ensure newly created stmts
1699 that could throw are part of the same region. */
1700 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1701 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1702 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1705 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1706 a function declaration if the target has a vectorized version
1707 of the function, or NULL_TREE if the function cannot be vectorized. */
1710 vectorizable_function (gimple_call call
, tree vectype_out
, tree vectype_in
)
1712 tree fndecl
= gimple_call_fndecl (call
);
1714 /* We only handle functions that do not read or clobber memory -- i.e.
1715 const or novops ones. */
1716 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1720 || TREE_CODE (fndecl
) != FUNCTION_DECL
1721 || !DECL_BUILT_IN (fndecl
))
1724 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1729 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1730 gimple_stmt_iterator
*);
1733 /* Function vectorizable_mask_load_store.
1735 Check if STMT performs a conditional load or store that can be vectorized.
1736 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1737 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1738 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1741 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1742 gimple
*vec_stmt
, slp_tree slp_node
)
1744 tree vec_dest
= NULL
;
1745 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1746 stmt_vec_info prev_stmt_info
;
1747 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1748 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1749 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1750 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1751 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1755 tree dataref_ptr
= NULL_TREE
;
1757 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1761 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1762 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1763 int gather_scale
= 1;
1764 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1769 enum vect_def_type dt
;
1771 if (slp_node
!= NULL
)
1774 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1775 gcc_assert (ncopies
>= 1);
1777 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1778 mask
= gimple_call_arg (stmt
, 2);
1779 if (TYPE_PRECISION (TREE_TYPE (mask
))
1780 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1783 /* FORNOW. This restriction should be relaxed. */
1784 if (nested_in_vect_loop
&& ncopies
> 1)
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1788 "multiple types in nested loop.");
1792 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1795 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1798 if (!STMT_VINFO_DATA_REF (stmt_info
))
1801 elem_type
= TREE_TYPE (vectype
);
1803 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1806 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1809 if (STMT_VINFO_GATHER_P (stmt_info
))
1813 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1814 &gather_off
, &gather_scale
);
1815 gcc_assert (gather_decl
);
1816 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1817 &def_stmt
, &def
, &gather_dt
,
1818 &gather_off_vectype
))
1820 if (dump_enabled_p ())
1821 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1822 "gather index use not simple.");
1826 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1828 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1829 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1831 if (dump_enabled_p ())
1832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1833 "masked gather with integer mask not supported.");
1837 else if (tree_int_cst_compare (nested_in_vect_loop
1838 ? STMT_VINFO_DR_STEP (stmt_info
)
1839 : DR_STEP (dr
), size_zero_node
) <= 0)
1841 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1842 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1845 if (TREE_CODE (mask
) != SSA_NAME
)
1848 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1849 &def_stmt
, &def
, &dt
))
1854 tree rhs
= gimple_call_arg (stmt
, 3);
1855 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1856 &def_stmt
, &def
, &dt
))
1860 if (!vec_stmt
) /* transformation not required. */
1862 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1864 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1867 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1873 if (STMT_VINFO_GATHER_P (stmt_info
))
1875 tree vec_oprnd0
= NULL_TREE
, op
;
1876 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1877 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1878 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1879 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1880 tree mask_perm_mask
= NULL_TREE
;
1881 edge pe
= loop_preheader_edge (loop
);
1884 enum { NARROW
, NONE
, WIDEN
} modifier
;
1885 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1887 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1888 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1889 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1890 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1891 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1892 scaletype
= TREE_VALUE (arglist
);
1893 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1894 && types_compatible_p (srctype
, masktype
));
1896 if (nunits
== gather_off_nunits
)
1898 else if (nunits
== gather_off_nunits
/ 2)
1900 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1903 for (i
= 0; i
< gather_off_nunits
; ++i
)
1904 sel
[i
] = i
| nunits
;
1906 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
1907 gcc_assert (perm_mask
!= NULL_TREE
);
1909 else if (nunits
== gather_off_nunits
* 2)
1911 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1914 for (i
= 0; i
< nunits
; ++i
)
1915 sel
[i
] = i
< gather_off_nunits
1916 ? i
: i
+ nunits
- gather_off_nunits
;
1918 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
1919 gcc_assert (perm_mask
!= NULL_TREE
);
1921 for (i
= 0; i
< nunits
; ++i
)
1922 sel
[i
] = i
| gather_off_nunits
;
1923 mask_perm_mask
= vect_gen_perm_mask (masktype
, sel
);
1924 gcc_assert (mask_perm_mask
!= NULL_TREE
);
1929 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1931 ptr
= fold_convert (ptrtype
, gather_base
);
1932 if (!is_gimple_min_invariant (ptr
))
1934 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1935 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1936 gcc_assert (!new_bb
);
1939 scale
= build_int_cst (scaletype
, gather_scale
);
1941 prev_stmt_info
= NULL
;
1942 for (j
= 0; j
< ncopies
; ++j
)
1944 if (modifier
== WIDEN
&& (j
& 1))
1945 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1946 perm_mask
, stmt
, gsi
);
1949 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1952 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1954 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1956 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1957 == TYPE_VECTOR_SUBPARTS (idxtype
));
1958 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1959 var
= make_ssa_name (var
, NULL
);
1960 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1962 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
1964 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1968 if (mask_perm_mask
&& (j
& 1))
1969 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1970 mask_perm_mask
, stmt
, gsi
);
1974 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1977 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1978 &def_stmt
, &def
, &dt
);
1979 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1983 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1985 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1986 == TYPE_VECTOR_SUBPARTS (masktype
));
1987 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
1989 var
= make_ssa_name (var
, NULL
);
1990 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1992 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
1993 mask_op
, NULL_TREE
);
1994 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2000 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2003 if (!useless_type_conversion_p (vectype
, rettype
))
2005 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2006 == TYPE_VECTOR_SUBPARTS (rettype
));
2007 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2008 op
= make_ssa_name (var
, new_stmt
);
2009 gimple_call_set_lhs (new_stmt
, op
);
2010 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2011 var
= make_ssa_name (vec_dest
, NULL
);
2012 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2014 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
2019 var
= make_ssa_name (vec_dest
, new_stmt
);
2020 gimple_call_set_lhs (new_stmt
, var
);
2023 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2025 if (modifier
== NARROW
)
2032 var
= permute_vec_elements (prev_res
, var
,
2033 perm_mask
, stmt
, gsi
);
2034 new_stmt
= SSA_NAME_DEF_STMT (var
);
2037 if (prev_stmt_info
== NULL
)
2038 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2040 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2041 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2044 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2046 tree lhs
= gimple_call_lhs (stmt
);
2047 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2048 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2049 set_vinfo_for_stmt (stmt
, NULL
);
2050 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2051 gsi_replace (gsi
, new_stmt
, true);
2056 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2057 prev_stmt_info
= NULL
;
2058 for (i
= 0; i
< ncopies
; i
++)
2060 unsigned align
, misalign
;
2064 tree rhs
= gimple_call_arg (stmt
, 3);
2065 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2066 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2067 /* We should have catched mismatched types earlier. */
2068 gcc_assert (useless_type_conversion_p (vectype
,
2069 TREE_TYPE (vec_rhs
)));
2070 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2071 NULL_TREE
, &dummy
, gsi
,
2072 &ptr_incr
, false, &inv_p
);
2073 gcc_assert (!inv_p
);
2077 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2079 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2080 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2082 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2083 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2084 TYPE_SIZE_UNIT (vectype
));
2087 align
= TYPE_ALIGN_UNIT (vectype
);
2088 if (aligned_access_p (dr
))
2090 else if (DR_MISALIGNMENT (dr
) == -1)
2092 align
= TYPE_ALIGN_UNIT (elem_type
);
2096 misalign
= DR_MISALIGNMENT (dr
);
2097 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2100 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2101 gimple_call_arg (stmt
, 1),
2103 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2105 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2107 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2108 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2113 tree vec_mask
= NULL_TREE
;
2114 prev_stmt_info
= NULL
;
2115 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2116 for (i
= 0; i
< ncopies
; i
++)
2118 unsigned align
, misalign
;
2122 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2123 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2124 NULL_TREE
, &dummy
, gsi
,
2125 &ptr_incr
, false, &inv_p
);
2126 gcc_assert (!inv_p
);
2130 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2132 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2133 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2134 TYPE_SIZE_UNIT (vectype
));
2137 align
= TYPE_ALIGN_UNIT (vectype
);
2138 if (aligned_access_p (dr
))
2140 else if (DR_MISALIGNMENT (dr
) == -1)
2142 align
= TYPE_ALIGN_UNIT (elem_type
);
2146 misalign
= DR_MISALIGNMENT (dr
);
2147 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2150 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2151 gimple_call_arg (stmt
, 1),
2153 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
, NULL
));
2154 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2156 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2158 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2159 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2165 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2167 tree lhs
= gimple_call_lhs (stmt
);
2168 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2169 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2170 set_vinfo_for_stmt (stmt
, NULL
);
2171 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2172 gsi_replace (gsi
, new_stmt
, true);
2179 /* Function vectorizable_call.
2181 Check if GS performs a function call that can be vectorized.
2182 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2183 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2184 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2187 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2194 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2195 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2196 tree vectype_out
, vectype_in
;
2199 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2200 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2201 tree fndecl
, new_temp
, def
, rhs_type
;
2203 enum vect_def_type dt
[3]
2204 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2205 gimple new_stmt
= NULL
;
2207 vec
<tree
> vargs
= vNULL
;
2208 enum { NARROW
, NONE
, WIDEN
} modifier
;
2212 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2215 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2218 /* Is GS a vectorizable call? */
2219 stmt
= dyn_cast
<gimple_call
> (gs
);
2223 if (gimple_call_internal_p (stmt
)
2224 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2225 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2226 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2229 if (gimple_call_lhs (stmt
) == NULL_TREE
2230 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2233 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2235 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2237 /* Process function arguments. */
2238 rhs_type
= NULL_TREE
;
2239 vectype_in
= NULL_TREE
;
2240 nargs
= gimple_call_num_args (stmt
);
2242 /* Bail out if the function has more than three arguments, we do not have
2243 interesting builtin functions to vectorize with more than two arguments
2244 except for fma. No arguments is also not good. */
2245 if (nargs
== 0 || nargs
> 3)
2248 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2249 if (gimple_call_internal_p (stmt
)
2250 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2253 rhs_type
= unsigned_type_node
;
2256 for (i
= 0; i
< nargs
; i
++)
2260 op
= gimple_call_arg (stmt
, i
);
2262 /* We can only handle calls with arguments of the same type. */
2264 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2266 if (dump_enabled_p ())
2267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2268 "argument types differ.\n");
2272 rhs_type
= TREE_TYPE (op
);
2274 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2275 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2279 "use not simple.\n");
2284 vectype_in
= opvectype
;
2286 && opvectype
!= vectype_in
)
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2290 "argument vector types differ.\n");
2294 /* If all arguments are external or constant defs use a vector type with
2295 the same size as the output vector type. */
2297 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2299 gcc_assert (vectype_in
);
2302 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2305 "no vectype for scalar type ");
2306 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2307 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2314 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2315 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2316 if (nunits_in
== nunits_out
/ 2)
2318 else if (nunits_out
== nunits_in
)
2320 else if (nunits_out
== nunits_in
/ 2)
2325 /* For now, we only vectorize functions if a target specific builtin
2326 is available. TODO -- in some cases, it might be profitable to
2327 insert the calls for pieces of the vector, in order to be able
2328 to vectorize other operations in the loop. */
2329 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2330 if (fndecl
== NULL_TREE
)
2332 if (gimple_call_internal_p (stmt
)
2333 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2336 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2337 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2338 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2339 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2341 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2342 { 0, 1, 2, ... vf - 1 } vector. */
2343 gcc_assert (nargs
== 0);
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2349 "function is not vectorizable.\n");
2354 gcc_assert (!gimple_vuse (stmt
));
2356 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2358 else if (modifier
== NARROW
)
2359 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2361 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2363 /* Sanity check: make sure that at least one copy of the vectorized stmt
2364 needs to be generated. */
2365 gcc_assert (ncopies
>= 1);
2367 if (!vec_stmt
) /* transformation not required. */
2369 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2370 if (dump_enabled_p ())
2371 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2373 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2379 if (dump_enabled_p ())
2380 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2383 scalar_dest
= gimple_call_lhs (stmt
);
2384 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2386 prev_stmt_info
= NULL
;
2390 for (j
= 0; j
< ncopies
; ++j
)
2392 /* Build argument list for the vectorized call. */
2394 vargs
.create (nargs
);
2400 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2401 vec
<tree
> vec_oprnds0
;
2403 for (i
= 0; i
< nargs
; i
++)
2404 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2405 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2406 vec_oprnds0
= vec_defs
[0];
2408 /* Arguments are ready. Create the new vector stmt. */
2409 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2412 for (k
= 0; k
< nargs
; k
++)
2414 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2415 vargs
[k
] = vec_oprndsk
[i
];
2417 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2418 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2419 gimple_call_set_lhs (new_stmt
, new_temp
);
2420 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2421 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2424 for (i
= 0; i
< nargs
; i
++)
2426 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2427 vec_oprndsi
.release ();
2432 for (i
= 0; i
< nargs
; i
++)
2434 op
= gimple_call_arg (stmt
, i
);
2437 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2440 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2442 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2445 vargs
.quick_push (vec_oprnd0
);
2448 if (gimple_call_internal_p (stmt
)
2449 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2451 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2453 for (k
= 0; k
< nunits_out
; ++k
)
2454 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2455 tree cst
= build_vector (vectype_out
, v
);
2457 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2458 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2459 new_temp
= make_ssa_name (new_var
, init_stmt
);
2460 gimple_assign_set_lhs (init_stmt
, new_temp
);
2461 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2462 new_temp
= make_ssa_name (vec_dest
, NULL
);
2463 new_stmt
= gimple_build_assign (new_temp
,
2464 gimple_assign_lhs (init_stmt
));
2468 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2469 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2470 gimple_call_set_lhs (new_stmt
, new_temp
);
2472 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2475 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2477 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2479 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2485 for (j
= 0; j
< ncopies
; ++j
)
2487 /* Build argument list for the vectorized call. */
2489 vargs
.create (nargs
* 2);
2495 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2496 vec
<tree
> vec_oprnds0
;
2498 for (i
= 0; i
< nargs
; i
++)
2499 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2500 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2501 vec_oprnds0
= vec_defs
[0];
2503 /* Arguments are ready. Create the new vector stmt. */
2504 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2508 for (k
= 0; k
< nargs
; k
++)
2510 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2511 vargs
.quick_push (vec_oprndsk
[i
]);
2512 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2514 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2515 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2516 gimple_call_set_lhs (new_stmt
, new_temp
);
2517 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2518 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2521 for (i
= 0; i
< nargs
; i
++)
2523 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2524 vec_oprndsi
.release ();
2529 for (i
= 0; i
< nargs
; i
++)
2531 op
= gimple_call_arg (stmt
, i
);
2535 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2537 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2541 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2543 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2545 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2548 vargs
.quick_push (vec_oprnd0
);
2549 vargs
.quick_push (vec_oprnd1
);
2552 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2553 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2554 gimple_call_set_lhs (new_stmt
, new_temp
);
2555 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2558 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2560 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2562 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2565 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2570 /* No current target implements this case. */
2576 /* The call in STMT might prevent it from being removed in dce.
2577 We however cannot remove it here, due to the way the ssa name
2578 it defines is mapped to the new definition. So just replace
2579 rhs of the statement with something harmless. */
2584 type
= TREE_TYPE (scalar_dest
);
2585 if (is_pattern_stmt_p (stmt_info
))
2586 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2588 lhs
= gimple_call_lhs (stmt
);
2589 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2590 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2591 set_vinfo_for_stmt (stmt
, NULL
);
2592 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2593 gsi_replace (gsi
, new_stmt
, false);
2599 struct simd_call_arg_info
2603 enum vect_def_type dt
;
2604 HOST_WIDE_INT linear_step
;
2608 /* Function vectorizable_simd_clone_call.
2610 Check if STMT performs a function call that can be vectorized
2611 by calling a simd clone of the function.
2612 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2613 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2614 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2617 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2618 gimple
*vec_stmt
, slp_tree slp_node
)
2623 tree vec_oprnd0
= NULL_TREE
;
2624 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2626 unsigned int nunits
;
2627 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2628 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2629 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2630 tree fndecl
, new_temp
, def
;
2632 gimple new_stmt
= NULL
;
2634 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2635 vec
<tree
> vargs
= vNULL
;
2637 tree lhs
, rtype
, ratype
;
2638 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2640 /* Is STMT a vectorizable call? */
2641 if (!is_gimple_call (stmt
))
2644 fndecl
= gimple_call_fndecl (stmt
);
2645 if (fndecl
== NULL_TREE
)
2648 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2649 if (node
== NULL
|| node
->simd_clones
== NULL
)
2652 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2655 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2658 if (gimple_call_lhs (stmt
)
2659 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2662 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2664 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2666 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2670 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2673 /* Process function arguments. */
2674 nargs
= gimple_call_num_args (stmt
);
2676 /* Bail out if the function has zero arguments. */
2680 arginfo
.create (nargs
);
2682 for (i
= 0; i
< nargs
; i
++)
2684 simd_call_arg_info thisarginfo
;
2687 thisarginfo
.linear_step
= 0;
2688 thisarginfo
.align
= 0;
2689 thisarginfo
.op
= NULL_TREE
;
2691 op
= gimple_call_arg (stmt
, i
);
2692 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2693 &def_stmt
, &def
, &thisarginfo
.dt
,
2694 &thisarginfo
.vectype
)
2695 || thisarginfo
.dt
== vect_uninitialized_def
)
2697 if (dump_enabled_p ())
2698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2699 "use not simple.\n");
2704 if (thisarginfo
.dt
== vect_constant_def
2705 || thisarginfo
.dt
== vect_external_def
)
2706 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2708 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2710 if (thisarginfo
.dt
!= vect_constant_def
2711 && thisarginfo
.dt
!= vect_external_def
2713 && TREE_CODE (op
) == SSA_NAME
2714 && simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false)
2715 && tree_fits_shwi_p (iv
.step
))
2717 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2718 thisarginfo
.op
= iv
.base
;
2720 else if ((thisarginfo
.dt
== vect_constant_def
2721 || thisarginfo
.dt
== vect_external_def
)
2722 && POINTER_TYPE_P (TREE_TYPE (op
)))
2723 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2725 arginfo
.quick_push (thisarginfo
);
2728 unsigned int badness
= 0;
2729 struct cgraph_node
*bestn
= NULL
;
2730 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
))
2731 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
));
2733 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2734 n
= n
->simdclone
->next_clone
)
2736 unsigned int this_badness
= 0;
2737 if (n
->simdclone
->simdlen
2738 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2739 || n
->simdclone
->nargs
!= nargs
)
2741 if (n
->simdclone
->simdlen
2742 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2743 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2744 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2745 if (n
->simdclone
->inbranch
)
2746 this_badness
+= 2048;
2747 int target_badness
= targetm
.simd_clone
.usable (n
);
2748 if (target_badness
< 0)
2750 this_badness
+= target_badness
* 512;
2751 /* FORNOW: Have to add code to add the mask argument. */
2752 if (n
->simdclone
->inbranch
)
2754 for (i
= 0; i
< nargs
; i
++)
2756 switch (n
->simdclone
->args
[i
].arg_type
)
2758 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2759 if (!useless_type_conversion_p
2760 (n
->simdclone
->args
[i
].orig_type
,
2761 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2763 else if (arginfo
[i
].dt
== vect_constant_def
2764 || arginfo
[i
].dt
== vect_external_def
2765 || arginfo
[i
].linear_step
)
2768 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2769 if (arginfo
[i
].dt
!= vect_constant_def
2770 && arginfo
[i
].dt
!= vect_external_def
)
2773 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2774 if (arginfo
[i
].dt
== vect_constant_def
2775 || arginfo
[i
].dt
== vect_external_def
2776 || (arginfo
[i
].linear_step
2777 != n
->simdclone
->args
[i
].linear_step
))
2780 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2784 case SIMD_CLONE_ARG_TYPE_MASK
:
2787 if (i
== (size_t) -1)
2789 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2794 if (arginfo
[i
].align
)
2795 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2796 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2798 if (i
== (size_t) -1)
2800 if (bestn
== NULL
|| this_badness
< badness
)
2803 badness
= this_badness
;
2813 for (i
= 0; i
< nargs
; i
++)
2814 if ((arginfo
[i
].dt
== vect_constant_def
2815 || arginfo
[i
].dt
== vect_external_def
)
2816 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2819 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2821 if (arginfo
[i
].vectype
== NULL
2822 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2823 > bestn
->simdclone
->simdlen
))
2830 fndecl
= bestn
->decl
;
2831 nunits
= bestn
->simdclone
->simdlen
;
2832 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2834 /* If the function isn't const, only allow it in simd loops where user
2835 has asserted that at least nunits consecutive iterations can be
2836 performed using SIMD instructions. */
2837 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2838 && gimple_vuse (stmt
))
2844 /* Sanity check: make sure that at least one copy of the vectorized stmt
2845 needs to be generated. */
2846 gcc_assert (ncopies
>= 1);
2848 if (!vec_stmt
) /* transformation not required. */
2850 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
) = bestn
->decl
;
2851 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2852 if (dump_enabled_p ())
2853 dump_printf_loc (MSG_NOTE
, vect_location
,
2854 "=== vectorizable_simd_clone_call ===\n");
2855 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2866 scalar_dest
= gimple_call_lhs (stmt
);
2867 vec_dest
= NULL_TREE
;
2872 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2873 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2874 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2877 rtype
= TREE_TYPE (ratype
);
2881 prev_stmt_info
= NULL
;
2882 for (j
= 0; j
< ncopies
; ++j
)
2884 /* Build argument list for the vectorized call. */
2886 vargs
.create (nargs
);
2890 for (i
= 0; i
< nargs
; i
++)
2892 unsigned int k
, l
, m
, o
;
2894 op
= gimple_call_arg (stmt
, i
);
2895 switch (bestn
->simdclone
->args
[i
].arg_type
)
2897 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2898 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2899 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2900 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2902 if (TYPE_VECTOR_SUBPARTS (atype
)
2903 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2905 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2906 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2907 / TYPE_VECTOR_SUBPARTS (atype
));
2908 gcc_assert ((k
& (k
- 1)) == 0);
2911 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2914 vec_oprnd0
= arginfo
[i
].op
;
2915 if ((m
& (k
- 1)) == 0)
2917 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2920 arginfo
[i
].op
= vec_oprnd0
;
2922 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2924 bitsize_int ((m
& (k
- 1)) * prec
));
2926 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2928 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2929 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2933 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2934 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2935 gcc_assert ((k
& (k
- 1)) == 0);
2936 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2938 vec_alloc (ctor_elts
, k
);
2941 for (l
= 0; l
< k
; l
++)
2943 if (m
== 0 && l
== 0)
2945 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2948 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2950 arginfo
[i
].op
= vec_oprnd0
;
2953 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
2957 vargs
.safe_push (vec_oprnd0
);
2960 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
2962 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2964 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2965 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2970 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2971 vargs
.safe_push (op
);
2973 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2978 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
2983 edge pe
= loop_preheader_edge (loop
);
2984 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2985 gcc_assert (!new_bb
);
2987 tree phi_res
= copy_ssa_name (op
, NULL
);
2988 gimple new_phi
= create_phi_node (phi_res
, loop
->header
);
2989 set_vinfo_for_stmt (new_phi
,
2990 new_stmt_vec_info (new_phi
, loop_vinfo
,
2992 add_phi_arg (new_phi
, arginfo
[i
].op
,
2993 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
2995 = POINTER_TYPE_P (TREE_TYPE (op
))
2996 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
2997 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
2998 ? sizetype
: TREE_TYPE (op
);
3000 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3002 tree tcst
= wide_int_to_tree (type
, cst
);
3003 tree phi_arg
= copy_ssa_name (op
, NULL
);
3004 new_stmt
= gimple_build_assign_with_ops (code
, phi_arg
,
3006 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3007 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3008 set_vinfo_for_stmt (new_stmt
,
3009 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3011 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3013 arginfo
[i
].op
= phi_res
;
3014 vargs
.safe_push (phi_res
);
3019 = POINTER_TYPE_P (TREE_TYPE (op
))
3020 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3021 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3022 ? sizetype
: TREE_TYPE (op
);
3024 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3026 tree tcst
= wide_int_to_tree (type
, cst
);
3027 new_temp
= make_ssa_name (TREE_TYPE (op
), NULL
);
3029 = gimple_build_assign_with_ops (code
, new_temp
,
3030 arginfo
[i
].op
, tcst
);
3031 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3032 vargs
.safe_push (new_temp
);
3035 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3041 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3044 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3046 new_temp
= create_tmp_var (ratype
, NULL
);
3047 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3048 == TYPE_VECTOR_SUBPARTS (rtype
))
3049 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3051 new_temp
= make_ssa_name (rtype
, new_stmt
);
3052 gimple_call_set_lhs (new_stmt
, new_temp
);
3054 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3058 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3061 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3062 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3063 gcc_assert ((k
& (k
- 1)) == 0);
3064 for (l
= 0; l
< k
; l
++)
3069 t
= build_fold_addr_expr (new_temp
);
3070 t
= build2 (MEM_REF
, vectype
, t
,
3071 build_int_cst (TREE_TYPE (t
),
3072 l
* prec
/ BITS_PER_UNIT
));
3075 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3076 size_int (prec
), bitsize_int (l
* prec
));
3078 = gimple_build_assign (make_ssa_name (vectype
, NULL
), t
);
3079 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3080 if (j
== 0 && l
== 0)
3081 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3083 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3085 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3090 tree clobber
= build_constructor (ratype
, NULL
);
3091 TREE_THIS_VOLATILE (clobber
) = 1;
3092 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3093 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3097 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3099 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3100 / TYPE_VECTOR_SUBPARTS (rtype
));
3101 gcc_assert ((k
& (k
- 1)) == 0);
3102 if ((j
& (k
- 1)) == 0)
3103 vec_alloc (ret_ctor_elts
, k
);
3106 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3107 for (m
= 0; m
< o
; m
++)
3109 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3110 size_int (m
), NULL_TREE
, NULL_TREE
);
3112 = gimple_build_assign (make_ssa_name (rtype
, NULL
),
3114 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3115 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3116 gimple_assign_lhs (new_stmt
));
3118 tree clobber
= build_constructor (ratype
, NULL
);
3119 TREE_THIS_VOLATILE (clobber
) = 1;
3120 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3121 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3124 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3125 if ((j
& (k
- 1)) != k
- 1)
3127 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3129 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
),
3131 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3133 if ((unsigned) j
== k
- 1)
3134 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3136 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3138 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3143 tree t
= build_fold_addr_expr (new_temp
);
3144 t
= build2 (MEM_REF
, vectype
, t
,
3145 build_int_cst (TREE_TYPE (t
), 0));
3147 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
), t
);
3148 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3149 tree clobber
= build_constructor (ratype
, NULL
);
3150 TREE_THIS_VOLATILE (clobber
) = 1;
3151 vect_finish_stmt_generation (stmt
,
3152 gimple_build_assign (new_temp
,
3158 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3160 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3162 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3167 /* The call in STMT might prevent it from being removed in dce.
3168 We however cannot remove it here, due to the way the ssa name
3169 it defines is mapped to the new definition. So just replace
3170 rhs of the statement with something harmless. */
3177 type
= TREE_TYPE (scalar_dest
);
3178 if (is_pattern_stmt_p (stmt_info
))
3179 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3181 lhs
= gimple_call_lhs (stmt
);
3182 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3185 new_stmt
= gimple_build_nop ();
3186 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3187 set_vinfo_for_stmt (stmt
, NULL
);
3188 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3189 gsi_replace (gsi
, new_stmt
, false);
3190 unlink_stmt_vdef (stmt
);
3196 /* Function vect_gen_widened_results_half
3198 Create a vector stmt whose code, type, number of arguments, and result
3199 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3200 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3201 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3202 needs to be created (DECL is a function-decl of a target-builtin).
3203 STMT is the original scalar stmt that we are vectorizing. */
3206 vect_gen_widened_results_half (enum tree_code code
,
3208 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3209 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3215 /* Generate half of the widened result: */
3216 if (code
== CALL_EXPR
)
3218 /* Target specific support */
3219 if (op_type
== binary_op
)
3220 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3222 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3223 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3224 gimple_call_set_lhs (new_stmt
, new_temp
);
3228 /* Generic support */
3229 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3230 if (op_type
!= binary_op
)
3232 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
3234 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3235 gimple_assign_set_lhs (new_stmt
, new_temp
);
3237 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3243 /* Get vectorized definitions for loop-based vectorization. For the first
3244 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3245 scalar operand), and for the rest we get a copy with
3246 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3247 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3248 The vectors are collected into VEC_OPRNDS. */
3251 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3252 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3256 /* Get first vector operand. */
3257 /* All the vector operands except the very first one (that is scalar oprnd)
3259 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3260 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3262 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3264 vec_oprnds
->quick_push (vec_oprnd
);
3266 /* Get second vector operand. */
3267 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3268 vec_oprnds
->quick_push (vec_oprnd
);
3272 /* For conversion in multiple steps, continue to get operands
3275 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3279 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3280 For multi-step conversions store the resulting vectors and call the function
3284 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3285 int multi_step_cvt
, gimple stmt
,
3287 gimple_stmt_iterator
*gsi
,
3288 slp_tree slp_node
, enum tree_code code
,
3289 stmt_vec_info
*prev_stmt_info
)
3292 tree vop0
, vop1
, new_tmp
, vec_dest
;
3294 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3296 vec_dest
= vec_dsts
.pop ();
3298 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3300 /* Create demotion operation. */
3301 vop0
= (*vec_oprnds
)[i
];
3302 vop1
= (*vec_oprnds
)[i
+ 1];
3303 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3304 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3305 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3306 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3309 /* Store the resulting vector for next recursive call. */
3310 (*vec_oprnds
)[i
/2] = new_tmp
;
3313 /* This is the last step of the conversion sequence. Store the
3314 vectors in SLP_NODE or in vector info of the scalar statement
3315 (or in STMT_VINFO_RELATED_STMT chain). */
3317 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3320 if (!*prev_stmt_info
)
3321 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3323 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3325 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3330 /* For multi-step demotion operations we first generate demotion operations
3331 from the source type to the intermediate types, and then combine the
3332 results (stored in VEC_OPRNDS) in demotion operation to the destination
3336 /* At each level of recursion we have half of the operands we had at the
3338 vec_oprnds
->truncate ((i
+1)/2);
3339 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3340 stmt
, vec_dsts
, gsi
, slp_node
,
3341 VEC_PACK_TRUNC_EXPR
,
3345 vec_dsts
.quick_push (vec_dest
);
3349 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3350 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3351 the resulting vectors and call the function recursively. */
3354 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3355 vec
<tree
> *vec_oprnds1
,
3356 gimple stmt
, tree vec_dest
,
3357 gimple_stmt_iterator
*gsi
,
3358 enum tree_code code1
,
3359 enum tree_code code2
, tree decl1
,
3360 tree decl2
, int op_type
)
3363 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3364 gimple new_stmt1
, new_stmt2
;
3365 vec
<tree
> vec_tmp
= vNULL
;
3367 vec_tmp
.create (vec_oprnds0
->length () * 2);
3368 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3370 if (op_type
== binary_op
)
3371 vop1
= (*vec_oprnds1
)[i
];
3375 /* Generate the two halves of promotion operation. */
3376 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3377 op_type
, vec_dest
, gsi
, stmt
);
3378 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3379 op_type
, vec_dest
, gsi
, stmt
);
3380 if (is_gimple_call (new_stmt1
))
3382 new_tmp1
= gimple_call_lhs (new_stmt1
);
3383 new_tmp2
= gimple_call_lhs (new_stmt2
);
3387 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3388 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3391 /* Store the results for the next step. */
3392 vec_tmp
.quick_push (new_tmp1
);
3393 vec_tmp
.quick_push (new_tmp2
);
3396 vec_oprnds0
->release ();
3397 *vec_oprnds0
= vec_tmp
;
3401 /* Check if STMT performs a conversion operation, that can be vectorized.
3402 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3403 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3404 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3407 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3408 gimple
*vec_stmt
, slp_tree slp_node
)
3412 tree op0
, op1
= NULL_TREE
;
3413 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3414 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3415 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3416 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3417 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3418 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3422 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3423 gimple new_stmt
= NULL
;
3424 stmt_vec_info prev_stmt_info
;
3427 tree vectype_out
, vectype_in
;
3429 tree lhs_type
, rhs_type
;
3430 enum { NARROW
, NONE
, WIDEN
} modifier
;
3431 vec
<tree
> vec_oprnds0
= vNULL
;
3432 vec
<tree
> vec_oprnds1
= vNULL
;
3434 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3435 int multi_step_cvt
= 0;
3436 vec
<tree
> vec_dsts
= vNULL
;
3437 vec
<tree
> interm_types
= vNULL
;
3438 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3440 enum machine_mode rhs_mode
;
3441 unsigned short fltsz
;
3443 /* Is STMT a vectorizable conversion? */
3445 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3448 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3451 if (!is_gimple_assign (stmt
))
3454 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3457 code
= gimple_assign_rhs_code (stmt
);
3458 if (!CONVERT_EXPR_CODE_P (code
)
3459 && code
!= FIX_TRUNC_EXPR
3460 && code
!= FLOAT_EXPR
3461 && code
!= WIDEN_MULT_EXPR
3462 && code
!= WIDEN_LSHIFT_EXPR
)
3465 op_type
= TREE_CODE_LENGTH (code
);
3467 /* Check types of lhs and rhs. */
3468 scalar_dest
= gimple_assign_lhs (stmt
);
3469 lhs_type
= TREE_TYPE (scalar_dest
);
3470 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3472 op0
= gimple_assign_rhs1 (stmt
);
3473 rhs_type
= TREE_TYPE (op0
);
3475 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3476 && !((INTEGRAL_TYPE_P (lhs_type
)
3477 && INTEGRAL_TYPE_P (rhs_type
))
3478 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3479 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3482 if ((INTEGRAL_TYPE_P (lhs_type
)
3483 && (TYPE_PRECISION (lhs_type
)
3484 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3485 || (INTEGRAL_TYPE_P (rhs_type
)
3486 && (TYPE_PRECISION (rhs_type
)
3487 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3489 if (dump_enabled_p ())
3490 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3491 "type conversion to/from bit-precision unsupported."
3496 /* Check the operands of the operation. */
3497 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3498 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3500 if (dump_enabled_p ())
3501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3502 "use not simple.\n");
3505 if (op_type
== binary_op
)
3509 op1
= gimple_assign_rhs2 (stmt
);
3510 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3511 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3513 if (CONSTANT_CLASS_P (op0
))
3514 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3515 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3517 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3522 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3524 "use not simple.\n");
3529 /* If op0 is an external or constant defs use a vector type of
3530 the same size as the output vector type. */
3532 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3534 gcc_assert (vectype_in
);
3537 if (dump_enabled_p ())
3539 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3540 "no vectype for scalar type ");
3541 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3542 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3548 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3549 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3550 if (nunits_in
< nunits_out
)
3552 else if (nunits_out
== nunits_in
)
3557 /* Multiple types in SLP are handled by creating the appropriate number of
3558 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3560 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3562 else if (modifier
== NARROW
)
3563 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3565 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3567 /* Sanity check: make sure that at least one copy of the vectorized stmt
3568 needs to be generated. */
3569 gcc_assert (ncopies
>= 1);
3571 /* Supportable by target? */
3575 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3577 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3584 "conversion not supported by target.\n");
3588 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3589 &code1
, &code2
, &multi_step_cvt
,
3592 /* Binary widening operation can only be supported directly by the
3594 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3598 if (code
!= FLOAT_EXPR
3599 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3600 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3603 rhs_mode
= TYPE_MODE (rhs_type
);
3604 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3605 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3606 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3607 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3610 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3611 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3612 if (cvt_type
== NULL_TREE
)
3615 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3617 if (!supportable_convert_operation (code
, vectype_out
,
3618 cvt_type
, &decl1
, &codecvt1
))
3621 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3622 cvt_type
, &codecvt1
,
3623 &codecvt2
, &multi_step_cvt
,
3627 gcc_assert (multi_step_cvt
== 0);
3629 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3630 vectype_in
, &code1
, &code2
,
3631 &multi_step_cvt
, &interm_types
))
3635 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3638 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3639 codecvt2
= ERROR_MARK
;
3643 interm_types
.safe_push (cvt_type
);
3644 cvt_type
= NULL_TREE
;
3649 gcc_assert (op_type
== unary_op
);
3650 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3651 &code1
, &multi_step_cvt
,
3655 if (code
!= FIX_TRUNC_EXPR
3656 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3657 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3660 rhs_mode
= TYPE_MODE (rhs_type
);
3662 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3663 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3664 if (cvt_type
== NULL_TREE
)
3666 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3669 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3670 &code1
, &multi_step_cvt
,
3679 if (!vec_stmt
) /* transformation not required. */
3681 if (dump_enabled_p ())
3682 dump_printf_loc (MSG_NOTE
, vect_location
,
3683 "=== vectorizable_conversion ===\n");
3684 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3686 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3687 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3689 else if (modifier
== NARROW
)
3691 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3692 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3696 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3697 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3699 interm_types
.release ();
3704 if (dump_enabled_p ())
3705 dump_printf_loc (MSG_NOTE
, vect_location
,
3706 "transform conversion. ncopies = %d.\n", ncopies
);
3708 if (op_type
== binary_op
)
3710 if (CONSTANT_CLASS_P (op0
))
3711 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3712 else if (CONSTANT_CLASS_P (op1
))
3713 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3716 /* In case of multi-step conversion, we first generate conversion operations
3717 to the intermediate types, and then from that types to the final one.
3718 We create vector destinations for the intermediate type (TYPES) received
3719 from supportable_*_operation, and store them in the correct order
3720 for future use in vect_create_vectorized_*_stmts (). */
3721 vec_dsts
.create (multi_step_cvt
+ 1);
3722 vec_dest
= vect_create_destination_var (scalar_dest
,
3723 (cvt_type
&& modifier
== WIDEN
)
3724 ? cvt_type
: vectype_out
);
3725 vec_dsts
.quick_push (vec_dest
);
3729 for (i
= interm_types
.length () - 1;
3730 interm_types
.iterate (i
, &intermediate_type
); i
--)
3732 vec_dest
= vect_create_destination_var (scalar_dest
,
3734 vec_dsts
.quick_push (vec_dest
);
3739 vec_dest
= vect_create_destination_var (scalar_dest
,
3741 ? vectype_out
: cvt_type
);
3745 if (modifier
== WIDEN
)
3747 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3748 if (op_type
== binary_op
)
3749 vec_oprnds1
.create (1);
3751 else if (modifier
== NARROW
)
3752 vec_oprnds0
.create (
3753 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3755 else if (code
== WIDEN_LSHIFT_EXPR
)
3756 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3759 prev_stmt_info
= NULL
;
3763 for (j
= 0; j
< ncopies
; j
++)
3766 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3769 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3771 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3773 /* Arguments are ready, create the new vector stmt. */
3774 if (code1
== CALL_EXPR
)
3776 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3777 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3778 gimple_call_set_lhs (new_stmt
, new_temp
);
3782 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3783 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
3785 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3786 gimple_assign_set_lhs (new_stmt
, new_temp
);
3789 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3791 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3795 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3797 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3798 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3803 /* In case the vectorization factor (VF) is bigger than the number
3804 of elements that we can fit in a vectype (nunits), we have to
3805 generate more than one vector stmt - i.e - we need to "unroll"
3806 the vector stmt by a factor VF/nunits. */
3807 for (j
= 0; j
< ncopies
; j
++)
3814 if (code
== WIDEN_LSHIFT_EXPR
)
3819 /* Store vec_oprnd1 for every vector stmt to be created
3820 for SLP_NODE. We check during the analysis that all
3821 the shift arguments are the same. */
3822 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3823 vec_oprnds1
.quick_push (vec_oprnd1
);
3825 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3829 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3830 &vec_oprnds1
, slp_node
, -1);
3834 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3835 vec_oprnds0
.quick_push (vec_oprnd0
);
3836 if (op_type
== binary_op
)
3838 if (code
== WIDEN_LSHIFT_EXPR
)
3841 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3843 vec_oprnds1
.quick_push (vec_oprnd1
);
3849 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3850 vec_oprnds0
.truncate (0);
3851 vec_oprnds0
.quick_push (vec_oprnd0
);
3852 if (op_type
== binary_op
)
3854 if (code
== WIDEN_LSHIFT_EXPR
)
3857 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3859 vec_oprnds1
.truncate (0);
3860 vec_oprnds1
.quick_push (vec_oprnd1
);
3864 /* Arguments are ready. Create the new vector stmts. */
3865 for (i
= multi_step_cvt
; i
>= 0; i
--)
3867 tree this_dest
= vec_dsts
[i
];
3868 enum tree_code c1
= code1
, c2
= code2
;
3869 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3874 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3876 stmt
, this_dest
, gsi
,
3877 c1
, c2
, decl1
, decl2
,
3881 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3885 if (codecvt1
== CALL_EXPR
)
3887 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3888 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3889 gimple_call_set_lhs (new_stmt
, new_temp
);
3893 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3894 new_temp
= make_ssa_name (vec_dest
, NULL
);
3895 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
3900 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3903 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3906 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3909 if (!prev_stmt_info
)
3910 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3912 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3913 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3918 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3922 /* In case the vectorization factor (VF) is bigger than the number
3923 of elements that we can fit in a vectype (nunits), we have to
3924 generate more than one vector stmt - i.e - we need to "unroll"
3925 the vector stmt by a factor VF/nunits. */
3926 for (j
= 0; j
< ncopies
; j
++)
3930 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3934 vec_oprnds0
.truncate (0);
3935 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3936 vect_pow2 (multi_step_cvt
) - 1);
3939 /* Arguments are ready. Create the new vector stmts. */
3941 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3943 if (codecvt1
== CALL_EXPR
)
3945 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3946 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3947 gimple_call_set_lhs (new_stmt
, new_temp
);
3951 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3952 new_temp
= make_ssa_name (vec_dest
, NULL
);
3953 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
3957 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3958 vec_oprnds0
[i
] = new_temp
;
3961 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
3962 stmt
, vec_dsts
, gsi
,
3967 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3971 vec_oprnds0
.release ();
3972 vec_oprnds1
.release ();
3973 vec_dsts
.release ();
3974 interm_types
.release ();
3980 /* Function vectorizable_assignment.
3982 Check if STMT performs an assignment (copy) that can be vectorized.
3983 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3984 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3985 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3988 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3989 gimple
*vec_stmt
, slp_tree slp_node
)
3994 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3995 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3996 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4000 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4001 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4004 vec
<tree
> vec_oprnds
= vNULL
;
4006 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4007 gimple new_stmt
= NULL
;
4008 stmt_vec_info prev_stmt_info
= NULL
;
4009 enum tree_code code
;
4012 /* Multiple types in SLP are handled by creating the appropriate number of
4013 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4015 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4018 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4020 gcc_assert (ncopies
>= 1);
4022 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4025 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4028 /* Is vectorizable assignment? */
4029 if (!is_gimple_assign (stmt
))
4032 scalar_dest
= gimple_assign_lhs (stmt
);
4033 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4036 code
= gimple_assign_rhs_code (stmt
);
4037 if (gimple_assign_single_p (stmt
)
4038 || code
== PAREN_EXPR
4039 || CONVERT_EXPR_CODE_P (code
))
4040 op
= gimple_assign_rhs1 (stmt
);
4044 if (code
== VIEW_CONVERT_EXPR
)
4045 op
= TREE_OPERAND (op
, 0);
4047 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4048 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4050 if (dump_enabled_p ())
4051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4052 "use not simple.\n");
4056 /* We can handle NOP_EXPR conversions that do not change the number
4057 of elements or the vector size. */
4058 if ((CONVERT_EXPR_CODE_P (code
)
4059 || code
== VIEW_CONVERT_EXPR
)
4061 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4062 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4063 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4066 /* We do not handle bit-precision changes. */
4067 if ((CONVERT_EXPR_CODE_P (code
)
4068 || code
== VIEW_CONVERT_EXPR
)
4069 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4070 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4071 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4072 || ((TYPE_PRECISION (TREE_TYPE (op
))
4073 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4074 /* But a conversion that does not change the bit-pattern is ok. */
4075 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4076 > TYPE_PRECISION (TREE_TYPE (op
)))
4077 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4079 if (dump_enabled_p ())
4080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4081 "type conversion to/from bit-precision "
4086 if (!vec_stmt
) /* transformation not required. */
4088 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4089 if (dump_enabled_p ())
4090 dump_printf_loc (MSG_NOTE
, vect_location
,
4091 "=== vectorizable_assignment ===\n");
4092 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4097 if (dump_enabled_p ())
4098 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4101 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4104 for (j
= 0; j
< ncopies
; j
++)
4108 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4110 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4112 /* Arguments are ready. create the new vector stmt. */
4113 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4115 if (CONVERT_EXPR_CODE_P (code
)
4116 || code
== VIEW_CONVERT_EXPR
)
4117 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4118 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4119 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4120 gimple_assign_set_lhs (new_stmt
, new_temp
);
4121 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4123 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4130 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4132 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4134 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4137 vec_oprnds
.release ();
4142 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4143 either as shift by a scalar or by a vector. */
4146 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4149 enum machine_mode vec_mode
;
4154 vectype
= get_vectype_for_scalar_type (scalar_type
);
4158 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4160 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4162 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4164 || (optab_handler (optab
, TYPE_MODE (vectype
))
4165 == CODE_FOR_nothing
))
4169 vec_mode
= TYPE_MODE (vectype
);
4170 icode
= (int) optab_handler (optab
, vec_mode
);
4171 if (icode
== CODE_FOR_nothing
)
4178 /* Function vectorizable_shift.
4180 Check if STMT performs a shift operation that can be vectorized.
4181 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4182 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4183 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4186 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4187 gimple
*vec_stmt
, slp_tree slp_node
)
4191 tree op0
, op1
= NULL
;
4192 tree vec_oprnd1
= NULL_TREE
;
4193 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4195 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4196 enum tree_code code
;
4197 enum machine_mode vec_mode
;
4201 enum machine_mode optab_op2_mode
;
4204 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4205 gimple new_stmt
= NULL
;
4206 stmt_vec_info prev_stmt_info
;
4213 vec
<tree
> vec_oprnds0
= vNULL
;
4214 vec
<tree
> vec_oprnds1
= vNULL
;
4217 bool scalar_shift_arg
= true;
4218 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4221 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4224 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4227 /* Is STMT a vectorizable binary/unary operation? */
4228 if (!is_gimple_assign (stmt
))
4231 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4234 code
= gimple_assign_rhs_code (stmt
);
4236 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4237 || code
== RROTATE_EXPR
))
4240 scalar_dest
= gimple_assign_lhs (stmt
);
4241 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4242 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4243 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4245 if (dump_enabled_p ())
4246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4247 "bit-precision shifts not supported.\n");
4251 op0
= gimple_assign_rhs1 (stmt
);
4252 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4253 &def_stmt
, &def
, &dt
[0], &vectype
))
4255 if (dump_enabled_p ())
4256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4257 "use not simple.\n");
4260 /* If op0 is an external or constant def use a vector type with
4261 the same size as the output vector type. */
4263 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4265 gcc_assert (vectype
);
4268 if (dump_enabled_p ())
4269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4270 "no vectype for scalar type\n");
4274 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4275 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4276 if (nunits_out
!= nunits_in
)
4279 op1
= gimple_assign_rhs2 (stmt
);
4280 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4281 &def
, &dt
[1], &op1_vectype
))
4283 if (dump_enabled_p ())
4284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4285 "use not simple.\n");
4290 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4294 /* Multiple types in SLP are handled by creating the appropriate number of
4295 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4297 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4300 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4302 gcc_assert (ncopies
>= 1);
4304 /* Determine whether the shift amount is a vector, or scalar. If the
4305 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4307 if (dt
[1] == vect_internal_def
&& !slp_node
)
4308 scalar_shift_arg
= false;
4309 else if (dt
[1] == vect_constant_def
4310 || dt
[1] == vect_external_def
4311 || dt
[1] == vect_internal_def
)
4313 /* In SLP, need to check whether the shift count is the same,
4314 in loops if it is a constant or invariant, it is always
4318 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4321 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4322 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4323 scalar_shift_arg
= false;
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4330 "operand mode requires invariant argument.\n");
4334 /* Vector shifted by vector. */
4335 if (!scalar_shift_arg
)
4337 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4338 if (dump_enabled_p ())
4339 dump_printf_loc (MSG_NOTE
, vect_location
,
4340 "vector/vector shift/rotate found.\n");
4343 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4344 if (op1_vectype
== NULL_TREE
4345 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4347 if (dump_enabled_p ())
4348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4349 "unusable type for last operand in"
4350 " vector/vector shift/rotate.\n");
4354 /* See if the machine has a vector shifted by scalar insn and if not
4355 then see if it has a vector shifted by vector insn. */
4358 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4360 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4362 if (dump_enabled_p ())
4363 dump_printf_loc (MSG_NOTE
, vect_location
,
4364 "vector/scalar shift/rotate found.\n");
4368 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4370 && (optab_handler (optab
, TYPE_MODE (vectype
))
4371 != CODE_FOR_nothing
))
4373 scalar_shift_arg
= false;
4375 if (dump_enabled_p ())
4376 dump_printf_loc (MSG_NOTE
, vect_location
,
4377 "vector/vector shift/rotate found.\n");
4379 /* Unlike the other binary operators, shifts/rotates have
4380 the rhs being int, instead of the same type as the lhs,
4381 so make sure the scalar is the right type if we are
4382 dealing with vectors of long long/long/short/char. */
4383 if (dt
[1] == vect_constant_def
)
4384 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4385 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4389 && TYPE_MODE (TREE_TYPE (vectype
))
4390 != TYPE_MODE (TREE_TYPE (op1
)))
4392 if (dump_enabled_p ())
4393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4394 "unusable type for last operand in"
4395 " vector/vector shift/rotate.\n");
4398 if (vec_stmt
&& !slp_node
)
4400 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4401 op1
= vect_init_vector (stmt
, op1
,
4402 TREE_TYPE (vectype
), NULL
);
4409 /* Supportable by target? */
4412 if (dump_enabled_p ())
4413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4417 vec_mode
= TYPE_MODE (vectype
);
4418 icode
= (int) optab_handler (optab
, vec_mode
);
4419 if (icode
== CODE_FOR_nothing
)
4421 if (dump_enabled_p ())
4422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4423 "op not supported by target.\n");
4424 /* Check only during analysis. */
4425 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4426 || (vf
< vect_min_worthwhile_factor (code
)
4429 if (dump_enabled_p ())
4430 dump_printf_loc (MSG_NOTE
, vect_location
,
4431 "proceeding using word mode.\n");
4434 /* Worthwhile without SIMD support? Check only during analysis. */
4435 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4436 && vf
< vect_min_worthwhile_factor (code
)
4439 if (dump_enabled_p ())
4440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4441 "not worthwhile without SIMD support.\n");
4445 if (!vec_stmt
) /* transformation not required. */
4447 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4448 if (dump_enabled_p ())
4449 dump_printf_loc (MSG_NOTE
, vect_location
,
4450 "=== vectorizable_shift ===\n");
4451 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4457 if (dump_enabled_p ())
4458 dump_printf_loc (MSG_NOTE
, vect_location
,
4459 "transform binary/unary operation.\n");
4462 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4464 prev_stmt_info
= NULL
;
4465 for (j
= 0; j
< ncopies
; j
++)
4470 if (scalar_shift_arg
)
4472 /* Vector shl and shr insn patterns can be defined with scalar
4473 operand 2 (shift operand). In this case, use constant or loop
4474 invariant op1 directly, without extending it to vector mode
4476 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4477 if (!VECTOR_MODE_P (optab_op2_mode
))
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_NOTE
, vect_location
,
4481 "operand 1 using scalar mode.\n");
4483 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4484 vec_oprnds1
.quick_push (vec_oprnd1
);
4487 /* Store vec_oprnd1 for every vector stmt to be created
4488 for SLP_NODE. We check during the analysis that all
4489 the shift arguments are the same.
4490 TODO: Allow different constants for different vector
4491 stmts generated for an SLP instance. */
4492 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4493 vec_oprnds1
.quick_push (vec_oprnd1
);
4498 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4499 (a special case for certain kind of vector shifts); otherwise,
4500 operand 1 should be of a vector type (the usual case). */
4502 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4505 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4509 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4511 /* Arguments are ready. Create the new vector stmt. */
4512 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4514 vop1
= vec_oprnds1
[i
];
4515 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4516 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4517 gimple_assign_set_lhs (new_stmt
, new_temp
);
4518 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4520 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4527 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4529 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4530 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4533 vec_oprnds0
.release ();
4534 vec_oprnds1
.release ();
4540 /* Function vectorizable_operation.
4542 Check if STMT performs a binary, unary or ternary operation that can
4544 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4545 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4546 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4549 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4550 gimple
*vec_stmt
, slp_tree slp_node
)
4554 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4555 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4557 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4558 enum tree_code code
;
4559 enum machine_mode vec_mode
;
4566 enum vect_def_type dt
[3]
4567 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4568 gimple new_stmt
= NULL
;
4569 stmt_vec_info prev_stmt_info
;
4575 vec
<tree
> vec_oprnds0
= vNULL
;
4576 vec
<tree
> vec_oprnds1
= vNULL
;
4577 vec
<tree
> vec_oprnds2
= vNULL
;
4578 tree vop0
, vop1
, vop2
;
4579 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4582 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4585 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4588 /* Is STMT a vectorizable binary/unary operation? */
4589 if (!is_gimple_assign (stmt
))
4592 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4595 code
= gimple_assign_rhs_code (stmt
);
4597 /* For pointer addition, we should use the normal plus for
4598 the vector addition. */
4599 if (code
== POINTER_PLUS_EXPR
)
4602 /* Support only unary or binary operations. */
4603 op_type
= TREE_CODE_LENGTH (code
);
4604 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4606 if (dump_enabled_p ())
4607 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4608 "num. args = %d (not unary/binary/ternary op).\n",
4613 scalar_dest
= gimple_assign_lhs (stmt
);
4614 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4616 /* Most operations cannot handle bit-precision types without extra
4618 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4619 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4620 /* Exception are bitwise binary operations. */
4621 && code
!= BIT_IOR_EXPR
4622 && code
!= BIT_XOR_EXPR
4623 && code
!= BIT_AND_EXPR
)
4625 if (dump_enabled_p ())
4626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4627 "bit-precision arithmetic not supported.\n");
4631 op0
= gimple_assign_rhs1 (stmt
);
4632 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4633 &def_stmt
, &def
, &dt
[0], &vectype
))
4635 if (dump_enabled_p ())
4636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4637 "use not simple.\n");
4640 /* If op0 is an external or constant def use a vector type with
4641 the same size as the output vector type. */
4643 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4645 gcc_assert (vectype
);
4648 if (dump_enabled_p ())
4650 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4651 "no vectype for scalar type ");
4652 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4654 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4660 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4661 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4662 if (nunits_out
!= nunits_in
)
4665 if (op_type
== binary_op
|| op_type
== ternary_op
)
4667 op1
= gimple_assign_rhs2 (stmt
);
4668 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4673 "use not simple.\n");
4677 if (op_type
== ternary_op
)
4679 op2
= gimple_assign_rhs3 (stmt
);
4680 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4683 if (dump_enabled_p ())
4684 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4685 "use not simple.\n");
4691 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4695 /* Multiple types in SLP are handled by creating the appropriate number of
4696 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4698 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4701 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4703 gcc_assert (ncopies
>= 1);
4705 /* Shifts are handled in vectorizable_shift (). */
4706 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4707 || code
== RROTATE_EXPR
)
4710 /* Supportable by target? */
4712 vec_mode
= TYPE_MODE (vectype
);
4713 if (code
== MULT_HIGHPART_EXPR
)
4715 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4716 icode
= LAST_INSN_CODE
;
4718 icode
= CODE_FOR_nothing
;
4722 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4725 if (dump_enabled_p ())
4726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4730 icode
= (int) optab_handler (optab
, vec_mode
);
4733 if (icode
== CODE_FOR_nothing
)
4735 if (dump_enabled_p ())
4736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4737 "op not supported by target.\n");
4738 /* Check only during analysis. */
4739 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4740 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4742 if (dump_enabled_p ())
4743 dump_printf_loc (MSG_NOTE
, vect_location
,
4744 "proceeding using word mode.\n");
4747 /* Worthwhile without SIMD support? Check only during analysis. */
4748 if (!VECTOR_MODE_P (vec_mode
)
4750 && vf
< vect_min_worthwhile_factor (code
))
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4754 "not worthwhile without SIMD support.\n");
4758 if (!vec_stmt
) /* transformation not required. */
4760 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4761 if (dump_enabled_p ())
4762 dump_printf_loc (MSG_NOTE
, vect_location
,
4763 "=== vectorizable_operation ===\n");
4764 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4770 if (dump_enabled_p ())
4771 dump_printf_loc (MSG_NOTE
, vect_location
,
4772 "transform binary/unary operation.\n");
4775 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4777 /* In case the vectorization factor (VF) is bigger than the number
4778 of elements that we can fit in a vectype (nunits), we have to generate
4779 more than one vector stmt - i.e - we need to "unroll" the
4780 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4781 from one copy of the vector stmt to the next, in the field
4782 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4783 stages to find the correct vector defs to be used when vectorizing
4784 stmts that use the defs of the current stmt. The example below
4785 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4786 we need to create 4 vectorized stmts):
4788 before vectorization:
4789 RELATED_STMT VEC_STMT
4793 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4795 RELATED_STMT VEC_STMT
4796 VS1_0: vx0 = memref0 VS1_1 -
4797 VS1_1: vx1 = memref1 VS1_2 -
4798 VS1_2: vx2 = memref2 VS1_3 -
4799 VS1_3: vx3 = memref3 - -
4800 S1: x = load - VS1_0
4803 step2: vectorize stmt S2 (done here):
4804 To vectorize stmt S2 we first need to find the relevant vector
4805 def for the first operand 'x'. This is, as usual, obtained from
4806 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4807 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4808 relevant vector def 'vx0'. Having found 'vx0' we can generate
4809 the vector stmt VS2_0, and as usual, record it in the
4810 STMT_VINFO_VEC_STMT of stmt S2.
4811 When creating the second copy (VS2_1), we obtain the relevant vector
4812 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4813 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4814 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4815 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4816 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4817 chain of stmts and pointers:
4818 RELATED_STMT VEC_STMT
4819 VS1_0: vx0 = memref0 VS1_1 -
4820 VS1_1: vx1 = memref1 VS1_2 -
4821 VS1_2: vx2 = memref2 VS1_3 -
4822 VS1_3: vx3 = memref3 - -
4823 S1: x = load - VS1_0
4824 VS2_0: vz0 = vx0 + v1 VS2_1 -
4825 VS2_1: vz1 = vx1 + v1 VS2_2 -
4826 VS2_2: vz2 = vx2 + v1 VS2_3 -
4827 VS2_3: vz3 = vx3 + v1 - -
4828 S2: z = x + 1 - VS2_0 */
4830 prev_stmt_info
= NULL
;
4831 for (j
= 0; j
< ncopies
; j
++)
4836 if (op_type
== binary_op
|| op_type
== ternary_op
)
4837 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4840 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4842 if (op_type
== ternary_op
)
4844 vec_oprnds2
.create (1);
4845 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4852 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4853 if (op_type
== ternary_op
)
4855 tree vec_oprnd
= vec_oprnds2
.pop ();
4856 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4861 /* Arguments are ready. Create the new vector stmt. */
4862 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4864 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4865 ? vec_oprnds1
[i
] : NULL_TREE
);
4866 vop2
= ((op_type
== ternary_op
)
4867 ? vec_oprnds2
[i
] : NULL_TREE
);
4868 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
4870 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4871 gimple_assign_set_lhs (new_stmt
, new_temp
);
4872 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4874 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4881 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4883 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4884 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4887 vec_oprnds0
.release ();
4888 vec_oprnds1
.release ();
4889 vec_oprnds2
.release ();
4894 /* A helper function to ensure data reference DR's base alignment
4898 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4903 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4905 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4906 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4908 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4909 DECL_USER_ALIGN (base_decl
) = 1;
4910 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4915 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4916 reversal of the vector elements. If that is impossible to do,
4920 perm_mask_for_reverse (tree vectype
)
4925 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4926 sel
= XALLOCAVEC (unsigned char, nunits
);
4928 for (i
= 0; i
< nunits
; ++i
)
4929 sel
[i
] = nunits
- 1 - i
;
4931 return vect_gen_perm_mask (vectype
, sel
);
4934 /* Function vectorizable_store.
4936 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4938 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4939 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4940 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4943 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4949 tree vec_oprnd
= NULL_TREE
;
4950 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4951 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4952 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4954 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4955 struct loop
*loop
= NULL
;
4956 enum machine_mode vec_mode
;
4958 enum dr_alignment_support alignment_support_scheme
;
4961 enum vect_def_type dt
;
4962 stmt_vec_info prev_stmt_info
= NULL
;
4963 tree dataref_ptr
= NULL_TREE
;
4964 tree dataref_offset
= NULL_TREE
;
4965 gimple ptr_incr
= NULL
;
4966 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4969 gimple next_stmt
, first_stmt
= NULL
;
4970 bool grouped_store
= false;
4971 bool store_lanes_p
= false;
4972 unsigned int group_size
, i
;
4973 vec
<tree
> dr_chain
= vNULL
;
4974 vec
<tree
> oprnds
= vNULL
;
4975 vec
<tree
> result_chain
= vNULL
;
4977 bool negative
= false;
4978 tree offset
= NULL_TREE
;
4979 vec
<tree
> vec_oprnds
= vNULL
;
4980 bool slp
= (slp_node
!= NULL
);
4981 unsigned int vec_num
;
4982 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4986 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4988 /* Multiple types in SLP are handled by creating the appropriate number of
4989 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4991 if (slp
|| PURE_SLP_STMT (stmt_info
))
4994 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4996 gcc_assert (ncopies
>= 1);
4998 /* FORNOW. This restriction should be relaxed. */
4999 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5001 if (dump_enabled_p ())
5002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5003 "multiple types in nested loop.\n");
5007 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5010 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5013 /* Is vectorizable store? */
5015 if (!is_gimple_assign (stmt
))
5018 scalar_dest
= gimple_assign_lhs (stmt
);
5019 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5020 && is_pattern_stmt_p (stmt_info
))
5021 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5022 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5023 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5024 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5025 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5026 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5027 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5028 && TREE_CODE (scalar_dest
) != MEM_REF
)
5031 gcc_assert (gimple_assign_single_p (stmt
));
5032 op
= gimple_assign_rhs1 (stmt
);
5033 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5036 if (dump_enabled_p ())
5037 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5038 "use not simple.\n");
5042 elem_type
= TREE_TYPE (vectype
);
5043 vec_mode
= TYPE_MODE (vectype
);
5045 /* FORNOW. In some cases can vectorize even if data-type not supported
5046 (e.g. - array initialization with 0). */
5047 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5050 if (!STMT_VINFO_DATA_REF (stmt_info
))
5054 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5055 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5056 size_zero_node
) < 0;
5057 if (negative
&& ncopies
> 1)
5059 if (dump_enabled_p ())
5060 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5061 "multiple types with negative step.\n");
5067 gcc_assert (!grouped_store
);
5068 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5069 if (alignment_support_scheme
!= dr_aligned
5070 && alignment_support_scheme
!= dr_unaligned_supported
)
5072 if (dump_enabled_p ())
5073 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5074 "negative step but alignment required.\n");
5077 if (dt
!= vect_constant_def
5078 && dt
!= vect_external_def
5079 && !perm_mask_for_reverse (vectype
))
5081 if (dump_enabled_p ())
5082 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5083 "negative step and reversing not supported.\n");
5088 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5090 grouped_store
= true;
5091 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5092 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5094 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5095 if (vect_store_lanes_supported (vectype
, group_size
))
5096 store_lanes_p
= true;
5097 else if (!vect_grouped_store_supported (vectype
, group_size
))
5101 if (first_stmt
== stmt
)
5103 /* STMT is the leader of the group. Check the operands of all the
5104 stmts of the group. */
5105 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5108 gcc_assert (gimple_assign_single_p (next_stmt
));
5109 op
= gimple_assign_rhs1 (next_stmt
);
5110 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5111 &def_stmt
, &def
, &dt
))
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5115 "use not simple.\n");
5118 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5123 if (!vec_stmt
) /* transformation not required. */
5125 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5126 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5133 ensure_base_align (stmt_info
, dr
);
5137 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5138 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5140 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5143 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5145 /* We vectorize all the stmts of the interleaving group when we
5146 reach the last stmt in the group. */
5147 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5148 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5157 grouped_store
= false;
5158 /* VEC_NUM is the number of vect stmts to be created for this
5160 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5161 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5162 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5163 op
= gimple_assign_rhs1 (first_stmt
);
5166 /* VEC_NUM is the number of vect stmts to be created for this
5168 vec_num
= group_size
;
5174 group_size
= vec_num
= 1;
5177 if (dump_enabled_p ())
5178 dump_printf_loc (MSG_NOTE
, vect_location
,
5179 "transform store. ncopies = %d\n", ncopies
);
5181 dr_chain
.create (group_size
);
5182 oprnds
.create (group_size
);
5184 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5185 gcc_assert (alignment_support_scheme
);
5186 /* Targets with store-lane instructions must not require explicit
5188 gcc_assert (!store_lanes_p
5189 || alignment_support_scheme
== dr_aligned
5190 || alignment_support_scheme
== dr_unaligned_supported
);
5193 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5196 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5198 aggr_type
= vectype
;
5200 /* In case the vectorization factor (VF) is bigger than the number
5201 of elements that we can fit in a vectype (nunits), we have to generate
5202 more than one vector stmt - i.e - we need to "unroll" the
5203 vector stmt by a factor VF/nunits. For more details see documentation in
5204 vect_get_vec_def_for_copy_stmt. */
5206 /* In case of interleaving (non-unit grouped access):
5213 We create vectorized stores starting from base address (the access of the
5214 first stmt in the chain (S2 in the above example), when the last store stmt
5215 of the chain (S4) is reached:
5218 VS2: &base + vec_size*1 = vx0
5219 VS3: &base + vec_size*2 = vx1
5220 VS4: &base + vec_size*3 = vx3
5222 Then permutation statements are generated:
5224 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5225 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5228 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5229 (the order of the data-refs in the output of vect_permute_store_chain
5230 corresponds to the order of scalar stmts in the interleaving chain - see
5231 the documentation of vect_permute_store_chain()).
5233 In case of both multiple types and interleaving, above vector stores and
5234 permutation stmts are created for every copy. The result vector stmts are
5235 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5236 STMT_VINFO_RELATED_STMT for the next copies.
5239 prev_stmt_info
= NULL
;
5240 for (j
= 0; j
< ncopies
; j
++)
5248 /* Get vectorized arguments for SLP_NODE. */
5249 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5250 NULL
, slp_node
, -1);
5252 vec_oprnd
= vec_oprnds
[0];
5256 /* For interleaved stores we collect vectorized defs for all the
5257 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5258 used as an input to vect_permute_store_chain(), and OPRNDS as
5259 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5261 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5262 OPRNDS are of size 1. */
5263 next_stmt
= first_stmt
;
5264 for (i
= 0; i
< group_size
; i
++)
5266 /* Since gaps are not supported for interleaved stores,
5267 GROUP_SIZE is the exact number of stmts in the chain.
5268 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5269 there is no interleaving, GROUP_SIZE is 1, and only one
5270 iteration of the loop will be executed. */
5271 gcc_assert (next_stmt
5272 && gimple_assign_single_p (next_stmt
));
5273 op
= gimple_assign_rhs1 (next_stmt
);
5275 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5277 dr_chain
.quick_push (vec_oprnd
);
5278 oprnds
.quick_push (vec_oprnd
);
5279 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5283 /* We should have catched mismatched types earlier. */
5284 gcc_assert (useless_type_conversion_p (vectype
,
5285 TREE_TYPE (vec_oprnd
)));
5286 bool simd_lane_access_p
5287 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5288 if (simd_lane_access_p
5289 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5290 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5291 && integer_zerop (DR_OFFSET (first_dr
))
5292 && integer_zerop (DR_INIT (first_dr
))
5293 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5294 get_alias_set (DR_REF (first_dr
))))
5296 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5297 dataref_offset
= build_int_cst (reference_alias_ptr_type
5298 (DR_REF (first_dr
)), 0);
5303 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5304 simd_lane_access_p
? loop
: NULL
,
5305 offset
, &dummy
, gsi
, &ptr_incr
,
5306 simd_lane_access_p
, &inv_p
);
5307 gcc_assert (bb_vinfo
|| !inv_p
);
5311 /* For interleaved stores we created vectorized defs for all the
5312 defs stored in OPRNDS in the previous iteration (previous copy).
5313 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5314 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5316 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5317 OPRNDS are of size 1. */
5318 for (i
= 0; i
< group_size
; i
++)
5321 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5323 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5324 dr_chain
[i
] = vec_oprnd
;
5325 oprnds
[i
] = vec_oprnd
;
5329 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5330 TYPE_SIZE_UNIT (aggr_type
));
5332 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5333 TYPE_SIZE_UNIT (aggr_type
));
5340 /* Combine all the vectors into an array. */
5341 vec_array
= create_vector_array (vectype
, vec_num
);
5342 for (i
= 0; i
< vec_num
; i
++)
5344 vec_oprnd
= dr_chain
[i
];
5345 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5349 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5350 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5351 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5352 gimple_call_set_lhs (new_stmt
, data_ref
);
5353 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5361 result_chain
.create (group_size
);
5363 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5367 next_stmt
= first_stmt
;
5368 for (i
= 0; i
< vec_num
; i
++)
5370 unsigned align
, misalign
;
5373 /* Bump the vector pointer. */
5374 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5378 vec_oprnd
= vec_oprnds
[i
];
5379 else if (grouped_store
)
5380 /* For grouped stores vectorized defs are interleaved in
5381 vect_permute_store_chain(). */
5382 vec_oprnd
= result_chain
[i
];
5384 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5387 : build_int_cst (reference_alias_ptr_type
5388 (DR_REF (first_dr
)), 0));
5389 align
= TYPE_ALIGN_UNIT (vectype
);
5390 if (aligned_access_p (first_dr
))
5392 else if (DR_MISALIGNMENT (first_dr
) == -1)
5394 TREE_TYPE (data_ref
)
5395 = build_aligned_type (TREE_TYPE (data_ref
),
5396 TYPE_ALIGN (elem_type
));
5397 align
= TYPE_ALIGN_UNIT (elem_type
);
5402 TREE_TYPE (data_ref
)
5403 = build_aligned_type (TREE_TYPE (data_ref
),
5404 TYPE_ALIGN (elem_type
));
5405 misalign
= DR_MISALIGNMENT (first_dr
);
5407 if (dataref_offset
== NULL_TREE
)
5408 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5412 && dt
!= vect_constant_def
5413 && dt
!= vect_external_def
)
5415 tree perm_mask
= perm_mask_for_reverse (vectype
);
5417 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5419 tree new_temp
= make_ssa_name (perm_dest
, NULL
);
5421 /* Generate the permute statement. */
5423 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, new_temp
,
5424 vec_oprnd
, vec_oprnd
,
5426 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5428 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5429 vec_oprnd
= new_temp
;
5432 /* Arguments are ready. Create the new vector stmt. */
5433 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5434 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5439 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5447 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5449 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5450 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5454 dr_chain
.release ();
5456 result_chain
.release ();
5457 vec_oprnds
.release ();
5462 /* Given a vector type VECTYPE and permutation SEL returns
5463 the VECTOR_CST mask that implements the permutation of the
5464 vector elements. If that is impossible to do, returns NULL. */
5467 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
5469 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5472 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5474 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5477 mask_elt_type
= lang_hooks
.types
.type_for_mode
5478 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5479 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5481 mask_elts
= XALLOCAVEC (tree
, nunits
);
5482 for (i
= nunits
- 1; i
>= 0; i
--)
5483 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5484 mask_vec
= build_vector (mask_type
, mask_elts
);
5489 /* Given a vector variable X and Y, that was generated for the scalar
5490 STMT, generate instructions to permute the vector elements of X and Y
5491 using permutation mask MASK_VEC, insert them at *GSI and return the
5492 permuted vector variable. */
5495 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5496 gimple_stmt_iterator
*gsi
)
5498 tree vectype
= TREE_TYPE (x
);
5499 tree perm_dest
, data_ref
;
5502 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5503 data_ref
= make_ssa_name (perm_dest
, NULL
);
5505 /* Generate the permute statement. */
5506 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5508 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5513 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5514 inserting them on the loops preheader edge. Returns true if we
5515 were successful in doing so (and thus STMT can be moved then),
5516 otherwise returns false. */
5519 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5525 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5527 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5528 if (!gimple_nop_p (def_stmt
)
5529 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5531 /* Make sure we don't need to recurse. While we could do
5532 so in simple cases when there are more complex use webs
5533 we don't have an easy way to preserve stmt order to fulfil
5534 dependencies within them. */
5537 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5539 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5541 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5542 if (!gimple_nop_p (def_stmt2
)
5543 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5553 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5555 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5556 if (!gimple_nop_p (def_stmt
)
5557 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5559 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5560 gsi_remove (&gsi
, false);
5561 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5568 /* vectorizable_load.
5570 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5572 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5573 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5574 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5577 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5578 slp_tree slp_node
, slp_instance slp_node_instance
)
5581 tree vec_dest
= NULL
;
5582 tree data_ref
= NULL
;
5583 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5584 stmt_vec_info prev_stmt_info
;
5585 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5586 struct loop
*loop
= NULL
;
5587 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5588 bool nested_in_vect_loop
= false;
5589 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5590 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5593 enum machine_mode mode
;
5594 gimple new_stmt
= NULL
;
5596 enum dr_alignment_support alignment_support_scheme
;
5597 tree dataref_ptr
= NULL_TREE
;
5598 tree dataref_offset
= NULL_TREE
;
5599 gimple ptr_incr
= NULL
;
5600 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5602 int i
, j
, group_size
, group_gap
;
5603 tree msq
= NULL_TREE
, lsq
;
5604 tree offset
= NULL_TREE
;
5605 tree byte_offset
= NULL_TREE
;
5606 tree realignment_token
= NULL_TREE
;
5608 vec
<tree
> dr_chain
= vNULL
;
5609 bool grouped_load
= false;
5610 bool load_lanes_p
= false;
5613 bool negative
= false;
5614 bool compute_in_loop
= false;
5615 struct loop
*at_loop
;
5617 bool slp
= (slp_node
!= NULL
);
5618 bool slp_perm
= false;
5619 enum tree_code code
;
5620 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5623 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5624 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5625 int gather_scale
= 1;
5626 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5630 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5631 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5632 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5637 /* Multiple types in SLP are handled by creating the appropriate number of
5638 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5640 if (slp
|| PURE_SLP_STMT (stmt_info
))
5643 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5645 gcc_assert (ncopies
>= 1);
5647 /* FORNOW. This restriction should be relaxed. */
5648 if (nested_in_vect_loop
&& ncopies
> 1)
5650 if (dump_enabled_p ())
5651 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5652 "multiple types in nested loop.\n");
5656 /* Invalidate assumptions made by dependence analysis when vectorization
5657 on the unrolled body effectively re-orders stmts. */
5659 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5660 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5661 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5663 if (dump_enabled_p ())
5664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5665 "cannot perform implicit CSE when unrolling "
5666 "with negative dependence distance\n");
5670 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5673 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5676 /* Is vectorizable load? */
5677 if (!is_gimple_assign (stmt
))
5680 scalar_dest
= gimple_assign_lhs (stmt
);
5681 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5684 code
= gimple_assign_rhs_code (stmt
);
5685 if (code
!= ARRAY_REF
5686 && code
!= BIT_FIELD_REF
5687 && code
!= INDIRECT_REF
5688 && code
!= COMPONENT_REF
5689 && code
!= IMAGPART_EXPR
5690 && code
!= REALPART_EXPR
5692 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5695 if (!STMT_VINFO_DATA_REF (stmt_info
))
5698 elem_type
= TREE_TYPE (vectype
);
5699 mode
= TYPE_MODE (vectype
);
5701 /* FORNOW. In some cases can vectorize even if data-type not supported
5702 (e.g. - data copies). */
5703 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5705 if (dump_enabled_p ())
5706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5707 "Aligned load, but unsupported type.\n");
5711 /* Check if the load is a part of an interleaving chain. */
5712 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5714 grouped_load
= true;
5716 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5718 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5719 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5721 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5722 if (vect_load_lanes_supported (vectype
, group_size
))
5723 load_lanes_p
= true;
5724 else if (!vect_grouped_load_supported (vectype
, group_size
))
5728 /* Invalidate assumptions made by dependence analysis when vectorization
5729 on the unrolled body effectively re-orders stmts. */
5730 if (!PURE_SLP_STMT (stmt_info
)
5731 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5732 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5733 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5735 if (dump_enabled_p ())
5736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5737 "cannot perform implicit CSE when performing "
5738 "group loads with negative dependence distance\n");
5744 if (STMT_VINFO_GATHER_P (stmt_info
))
5748 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5749 &gather_off
, &gather_scale
);
5750 gcc_assert (gather_decl
);
5751 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5752 &def_stmt
, &def
, &gather_dt
,
5753 &gather_off_vectype
))
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5757 "gather index use not simple.\n");
5761 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5765 negative
= tree_int_cst_compare (nested_in_vect_loop
5766 ? STMT_VINFO_DR_STEP (stmt_info
)
5768 size_zero_node
) < 0;
5769 if (negative
&& ncopies
> 1)
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5773 "multiple types with negative step.\n");
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5783 "negative step for group load not supported"
5787 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5788 if (alignment_support_scheme
!= dr_aligned
5789 && alignment_support_scheme
!= dr_unaligned_supported
)
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5793 "negative step but alignment required.\n");
5796 if (!perm_mask_for_reverse (vectype
))
5798 if (dump_enabled_p ())
5799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5800 "negative step and reversing not supported."
5807 if (!vec_stmt
) /* transformation not required. */
5809 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5810 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
5814 if (dump_enabled_p ())
5815 dump_printf_loc (MSG_NOTE
, vect_location
,
5816 "transform load. ncopies = %d\n", ncopies
);
5820 ensure_base_align (stmt_info
, dr
);
5822 if (STMT_VINFO_GATHER_P (stmt_info
))
5824 tree vec_oprnd0
= NULL_TREE
, op
;
5825 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
5826 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5827 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
5828 edge pe
= loop_preheader_edge (loop
);
5831 enum { NARROW
, NONE
, WIDEN
} modifier
;
5832 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
5834 if (nunits
== gather_off_nunits
)
5836 else if (nunits
== gather_off_nunits
/ 2)
5838 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
5841 for (i
= 0; i
< gather_off_nunits
; ++i
)
5842 sel
[i
] = i
| nunits
;
5844 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
5845 gcc_assert (perm_mask
!= NULL_TREE
);
5847 else if (nunits
== gather_off_nunits
* 2)
5849 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5852 for (i
= 0; i
< nunits
; ++i
)
5853 sel
[i
] = i
< gather_off_nunits
5854 ? i
: i
+ nunits
- gather_off_nunits
;
5856 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
5857 gcc_assert (perm_mask
!= NULL_TREE
);
5863 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
5864 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5865 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5866 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5867 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5868 scaletype
= TREE_VALUE (arglist
);
5869 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
5871 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5873 ptr
= fold_convert (ptrtype
, gather_base
);
5874 if (!is_gimple_min_invariant (ptr
))
5876 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5877 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5878 gcc_assert (!new_bb
);
5881 /* Currently we support only unconditional gather loads,
5882 so mask should be all ones. */
5883 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
5884 mask
= build_int_cst (masktype
, -1);
5885 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
5887 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
5888 mask
= build_vector_from_val (masktype
, mask
);
5889 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5891 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
5895 for (j
= 0; j
< 6; ++j
)
5897 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
5898 mask
= build_real (TREE_TYPE (masktype
), r
);
5899 mask
= build_vector_from_val (masktype
, mask
);
5900 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5905 scale
= build_int_cst (scaletype
, gather_scale
);
5907 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
5908 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
5909 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
5913 for (j
= 0; j
< 6; ++j
)
5915 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
5916 merge
= build_real (TREE_TYPE (rettype
), r
);
5920 merge
= build_vector_from_val (rettype
, merge
);
5921 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
5923 prev_stmt_info
= NULL
;
5924 for (j
= 0; j
< ncopies
; ++j
)
5926 if (modifier
== WIDEN
&& (j
& 1))
5927 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
5928 perm_mask
, stmt
, gsi
);
5931 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
5934 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
5936 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5938 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5939 == TYPE_VECTOR_SUBPARTS (idxtype
));
5940 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
5941 var
= make_ssa_name (var
, NULL
);
5942 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5944 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
5946 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5951 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
5953 if (!useless_type_conversion_p (vectype
, rettype
))
5955 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
5956 == TYPE_VECTOR_SUBPARTS (rettype
));
5957 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
5958 op
= make_ssa_name (var
, new_stmt
);
5959 gimple_call_set_lhs (new_stmt
, op
);
5960 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5961 var
= make_ssa_name (vec_dest
, NULL
);
5962 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
5964 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
5969 var
= make_ssa_name (vec_dest
, new_stmt
);
5970 gimple_call_set_lhs (new_stmt
, var
);
5973 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5975 if (modifier
== NARROW
)
5982 var
= permute_vec_elements (prev_res
, var
,
5983 perm_mask
, stmt
, gsi
);
5984 new_stmt
= SSA_NAME_DEF_STMT (var
);
5987 if (prev_stmt_info
== NULL
)
5988 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5990 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5991 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5995 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5997 gimple_stmt_iterator incr_gsi
;
6003 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6004 gimple_seq stmts
= NULL
;
6005 tree stride_base
, stride_step
, alias_off
;
6007 gcc_assert (!nested_in_vect_loop
);
6010 = fold_build_pointer_plus
6011 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6012 size_binop (PLUS_EXPR
,
6013 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6014 convert_to_ptrofftype (DR_INIT (dr
))));
6015 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6017 /* For a load with loop-invariant (but other than power-of-2)
6018 stride (i.e. not a grouped access) like so:
6020 for (i = 0; i < n; i += stride)
6023 we generate a new induction variable and new accesses to
6024 form a new vector (or vectors, depending on ncopies):
6026 for (j = 0; ; j += VF*stride)
6028 tmp2 = array[j + stride];
6030 vectemp = {tmp1, tmp2, ...}
6033 ivstep
= stride_step
;
6034 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6035 build_int_cst (TREE_TYPE (ivstep
), vf
));
6037 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6039 create_iv (stride_base
, ivstep
, NULL
,
6040 loop
, &incr_gsi
, insert_after
,
6042 incr
= gsi_stmt (incr_gsi
);
6043 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6045 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6047 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6049 prev_stmt_info
= NULL
;
6050 running_off
= offvar
;
6051 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6052 for (j
= 0; j
< ncopies
; j
++)
6056 vec_alloc (v
, nunits
);
6057 for (i
= 0; i
< nunits
; i
++)
6059 tree newref
, newoff
;
6061 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
6062 running_off
, alias_off
);
6064 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6067 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6068 newoff
= copy_ssa_name (running_off
, NULL
);
6069 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
6070 running_off
, stride_step
);
6071 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6073 running_off
= newoff
;
6076 vec_inv
= build_constructor (vectype
, v
);
6077 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6078 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6081 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6083 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6084 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6091 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6093 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6094 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6095 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6097 /* Check if the chain of loads is already vectorized. */
6098 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6099 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6100 ??? But we can only do so if there is exactly one
6101 as we have no way to get at the rest. Leave the CSE
6103 ??? With the group load eventually participating
6104 in multiple different permutations (having multiple
6105 slp nodes which refer to the same group) the CSE
6106 is even wrong code. See PR56270. */
6109 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6112 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6113 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6115 /* VEC_NUM is the number of vect stmts to be created for this group. */
6118 grouped_load
= false;
6119 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6120 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6122 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6126 vec_num
= group_size
;
6134 group_size
= vec_num
= 1;
6138 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6139 gcc_assert (alignment_support_scheme
);
6140 /* Targets with load-lane instructions must not require explicit
6142 gcc_assert (!load_lanes_p
6143 || alignment_support_scheme
== dr_aligned
6144 || alignment_support_scheme
== dr_unaligned_supported
);
6146 /* In case the vectorization factor (VF) is bigger than the number
6147 of elements that we can fit in a vectype (nunits), we have to generate
6148 more than one vector stmt - i.e - we need to "unroll" the
6149 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6150 from one copy of the vector stmt to the next, in the field
6151 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6152 stages to find the correct vector defs to be used when vectorizing
6153 stmts that use the defs of the current stmt. The example below
6154 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6155 need to create 4 vectorized stmts):
6157 before vectorization:
6158 RELATED_STMT VEC_STMT
6162 step 1: vectorize stmt S1:
6163 We first create the vector stmt VS1_0, and, as usual, record a
6164 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6165 Next, we create the vector stmt VS1_1, and record a pointer to
6166 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6167 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6169 RELATED_STMT VEC_STMT
6170 VS1_0: vx0 = memref0 VS1_1 -
6171 VS1_1: vx1 = memref1 VS1_2 -
6172 VS1_2: vx2 = memref2 VS1_3 -
6173 VS1_3: vx3 = memref3 - -
6174 S1: x = load - VS1_0
6177 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6178 information we recorded in RELATED_STMT field is used to vectorize
6181 /* In case of interleaving (non-unit grouped access):
6188 Vectorized loads are created in the order of memory accesses
6189 starting from the access of the first stmt of the chain:
6192 VS2: vx1 = &base + vec_size*1
6193 VS3: vx3 = &base + vec_size*2
6194 VS4: vx4 = &base + vec_size*3
6196 Then permutation statements are generated:
6198 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6199 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6202 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6203 (the order of the data-refs in the output of vect_permute_load_chain
6204 corresponds to the order of scalar stmts in the interleaving chain - see
6205 the documentation of vect_permute_load_chain()).
6206 The generation of permutation stmts and recording them in
6207 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6209 In case of both multiple types and interleaving, the vector loads and
6210 permutation stmts above are created for every copy. The result vector
6211 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6212 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6214 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6215 on a target that supports unaligned accesses (dr_unaligned_supported)
6216 we generate the following code:
6220 p = p + indx * vectype_size;
6225 Otherwise, the data reference is potentially unaligned on a target that
6226 does not support unaligned accesses (dr_explicit_realign_optimized) -
6227 then generate the following code, in which the data in each iteration is
6228 obtained by two vector loads, one from the previous iteration, and one
6229 from the current iteration:
6231 msq_init = *(floor(p1))
6232 p2 = initial_addr + VS - 1;
6233 realignment_token = call target_builtin;
6236 p2 = p2 + indx * vectype_size
6238 vec_dest = realign_load (msq, lsq, realignment_token)
6243 /* If the misalignment remains the same throughout the execution of the
6244 loop, we can create the init_addr and permutation mask at the loop
6245 preheader. Otherwise, it needs to be created inside the loop.
6246 This can only occur when vectorizing memory accesses in the inner-loop
6247 nested within an outer-loop that is being vectorized. */
6249 if (nested_in_vect_loop
6250 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6251 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6253 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6254 compute_in_loop
= true;
6257 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6258 || alignment_support_scheme
== dr_explicit_realign
)
6259 && !compute_in_loop
)
6261 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6262 alignment_support_scheme
, NULL_TREE
,
6264 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6266 phi
= SSA_NAME_DEF_STMT (msq
);
6267 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6275 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6278 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6280 aggr_type
= vectype
;
6282 prev_stmt_info
= NULL
;
6283 for (j
= 0; j
< ncopies
; j
++)
6285 /* 1. Create the vector or array pointer update chain. */
6288 bool simd_lane_access_p
6289 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6290 if (simd_lane_access_p
6291 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6292 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6293 && integer_zerop (DR_OFFSET (first_dr
))
6294 && integer_zerop (DR_INIT (first_dr
))
6295 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6296 get_alias_set (DR_REF (first_dr
)))
6297 && (alignment_support_scheme
== dr_aligned
6298 || alignment_support_scheme
== dr_unaligned_supported
))
6300 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6301 dataref_offset
= build_int_cst (reference_alias_ptr_type
6302 (DR_REF (first_dr
)), 0);
6307 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6308 offset
, &dummy
, gsi
, &ptr_incr
,
6309 simd_lane_access_p
, &inv_p
,
6312 else if (dataref_offset
)
6313 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6314 TYPE_SIZE_UNIT (aggr_type
));
6316 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6317 TYPE_SIZE_UNIT (aggr_type
));
6319 if (grouped_load
|| slp_perm
)
6320 dr_chain
.create (vec_num
);
6326 vec_array
= create_vector_array (vectype
, vec_num
);
6329 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6330 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6331 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6332 gimple_call_set_lhs (new_stmt
, vec_array
);
6333 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6335 /* Extract each vector into an SSA_NAME. */
6336 for (i
= 0; i
< vec_num
; i
++)
6338 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6340 dr_chain
.quick_push (new_temp
);
6343 /* Record the mapping between SSA_NAMEs and statements. */
6344 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6348 for (i
= 0; i
< vec_num
; i
++)
6351 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6354 /* 2. Create the vector-load in the loop. */
6355 switch (alignment_support_scheme
)
6358 case dr_unaligned_supported
:
6360 unsigned int align
, misalign
;
6363 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6366 : build_int_cst (reference_alias_ptr_type
6367 (DR_REF (first_dr
)), 0));
6368 align
= TYPE_ALIGN_UNIT (vectype
);
6369 if (alignment_support_scheme
== dr_aligned
)
6371 gcc_assert (aligned_access_p (first_dr
));
6374 else if (DR_MISALIGNMENT (first_dr
) == -1)
6376 TREE_TYPE (data_ref
)
6377 = build_aligned_type (TREE_TYPE (data_ref
),
6378 TYPE_ALIGN (elem_type
));
6379 align
= TYPE_ALIGN_UNIT (elem_type
);
6384 TREE_TYPE (data_ref
)
6385 = build_aligned_type (TREE_TYPE (data_ref
),
6386 TYPE_ALIGN (elem_type
));
6387 misalign
= DR_MISALIGNMENT (first_dr
);
6389 if (dataref_offset
== NULL_TREE
)
6390 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6394 case dr_explicit_realign
:
6399 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6401 if (compute_in_loop
)
6402 msq
= vect_setup_realignment (first_stmt
, gsi
,
6404 dr_explicit_realign
,
6407 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
6408 new_stmt
= gimple_build_assign_with_ops
6409 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
6411 (TREE_TYPE (dataref_ptr
),
6412 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6413 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6415 = build2 (MEM_REF
, vectype
, ptr
,
6416 build_int_cst (reference_alias_ptr_type
6417 (DR_REF (first_dr
)), 0));
6418 vec_dest
= vect_create_destination_var (scalar_dest
,
6420 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6421 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6422 gimple_assign_set_lhs (new_stmt
, new_temp
);
6423 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6424 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6425 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6428 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6429 TYPE_SIZE_UNIT (elem_type
));
6430 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6431 new_stmt
= gimple_build_assign_with_ops
6432 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
6435 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6436 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6437 gimple_assign_set_lhs (new_stmt
, ptr
);
6438 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6440 = build2 (MEM_REF
, vectype
, ptr
,
6441 build_int_cst (reference_alias_ptr_type
6442 (DR_REF (first_dr
)), 0));
6445 case dr_explicit_realign_optimized
:
6446 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
6447 new_stmt
= gimple_build_assign_with_ops
6448 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
6450 (TREE_TYPE (dataref_ptr
),
6451 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6452 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6454 = build2 (MEM_REF
, vectype
, new_temp
,
6455 build_int_cst (reference_alias_ptr_type
6456 (DR_REF (first_dr
)), 0));
6461 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6462 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6463 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6464 gimple_assign_set_lhs (new_stmt
, new_temp
);
6465 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6467 /* 3. Handle explicit realignment if necessary/supported.
6469 vec_dest = realign_load (msq, lsq, realignment_token) */
6470 if (alignment_support_scheme
== dr_explicit_realign_optimized
6471 || alignment_support_scheme
== dr_explicit_realign
)
6473 lsq
= gimple_assign_lhs (new_stmt
);
6474 if (!realignment_token
)
6475 realignment_token
= dataref_ptr
;
6476 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6478 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
6481 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6482 gimple_assign_set_lhs (new_stmt
, new_temp
);
6483 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6485 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6488 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6489 add_phi_arg (phi
, lsq
,
6490 loop_latch_edge (containing_loop
),
6496 /* 4. Handle invariant-load. */
6497 if (inv_p
&& !bb_vinfo
)
6499 gcc_assert (!grouped_load
);
6500 /* If we have versioned for aliasing or the loop doesn't
6501 have any data dependencies that would preclude this,
6502 then we are sure this is a loop invariant load and
6503 thus we can insert it on the preheader edge. */
6504 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6505 && !nested_in_vect_loop
6506 && hoist_defs_of_uses (stmt
, loop
))
6508 if (dump_enabled_p ())
6510 dump_printf_loc (MSG_NOTE
, vect_location
,
6511 "hoisting out of the vectorized "
6513 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6514 dump_printf (MSG_NOTE
, "\n");
6516 tree tem
= copy_ssa_name (scalar_dest
, NULL
);
6517 gsi_insert_on_edge_immediate
6518 (loop_preheader_edge (loop
),
6519 gimple_build_assign (tem
,
6521 (gimple_assign_rhs1 (stmt
))));
6522 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6526 gimple_stmt_iterator gsi2
= *gsi
;
6528 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6531 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6532 set_vinfo_for_stmt (new_stmt
,
6533 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6539 tree perm_mask
= perm_mask_for_reverse (vectype
);
6540 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6541 perm_mask
, stmt
, gsi
);
6542 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6545 /* Collect vector loads and later create their permutation in
6546 vect_transform_grouped_load (). */
6547 if (grouped_load
|| slp_perm
)
6548 dr_chain
.quick_push (new_temp
);
6550 /* Store vector loads in the corresponding SLP_NODE. */
6551 if (slp
&& !slp_perm
)
6552 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6554 /* Bump the vector pointer to account for a gap. */
6555 if (slp
&& group_gap
!= 0)
6557 tree bump
= size_binop (MULT_EXPR
,
6558 TYPE_SIZE_UNIT (elem_type
),
6559 size_int (group_gap
));
6560 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6565 if (slp
&& !slp_perm
)
6570 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6571 slp_node_instance
, false))
6573 dr_chain
.release ();
6582 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6583 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6588 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6590 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6591 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6594 dr_chain
.release ();
6600 /* Function vect_is_simple_cond.
6603 LOOP - the loop that is being vectorized.
6604 COND - Condition that is checked for simple use.
6607 *COMP_VECTYPE - the vector type for the comparison.
6609 Returns whether a COND can be vectorized. Checks whether
6610 condition operands are supportable using vec_is_simple_use. */
6613 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6614 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6618 enum vect_def_type dt
;
6619 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6621 if (!COMPARISON_CLASS_P (cond
))
6624 lhs
= TREE_OPERAND (cond
, 0);
6625 rhs
= TREE_OPERAND (cond
, 1);
6627 if (TREE_CODE (lhs
) == SSA_NAME
)
6629 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6630 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6631 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6634 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6635 && TREE_CODE (lhs
) != FIXED_CST
)
6638 if (TREE_CODE (rhs
) == SSA_NAME
)
6640 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6641 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6642 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6645 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6646 && TREE_CODE (rhs
) != FIXED_CST
)
6649 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6653 /* vectorizable_condition.
6655 Check if STMT is conditional modify expression that can be vectorized.
6656 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6657 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6660 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6661 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6662 else caluse if it is 2).
6664 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6667 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6668 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6671 tree scalar_dest
= NULL_TREE
;
6672 tree vec_dest
= NULL_TREE
;
6673 tree cond_expr
, then_clause
, else_clause
;
6674 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6675 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6676 tree comp_vectype
= NULL_TREE
;
6677 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6678 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6679 tree vec_compare
, vec_cond_expr
;
6681 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6683 enum vect_def_type dt
, dts
[4];
6684 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6686 enum tree_code code
;
6687 stmt_vec_info prev_stmt_info
= NULL
;
6689 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6690 vec
<tree
> vec_oprnds0
= vNULL
;
6691 vec
<tree
> vec_oprnds1
= vNULL
;
6692 vec
<tree
> vec_oprnds2
= vNULL
;
6693 vec
<tree
> vec_oprnds3
= vNULL
;
6696 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6699 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6701 gcc_assert (ncopies
>= 1);
6702 if (reduc_index
&& ncopies
> 1)
6703 return false; /* FORNOW */
6705 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6708 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6711 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6712 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6716 /* FORNOW: not yet supported. */
6717 if (STMT_VINFO_LIVE_P (stmt_info
))
6719 if (dump_enabled_p ())
6720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6721 "value used after loop.\n");
6725 /* Is vectorizable conditional operation? */
6726 if (!is_gimple_assign (stmt
))
6729 code
= gimple_assign_rhs_code (stmt
);
6731 if (code
!= COND_EXPR
)
6734 cond_expr
= gimple_assign_rhs1 (stmt
);
6735 then_clause
= gimple_assign_rhs2 (stmt
);
6736 else_clause
= gimple_assign_rhs3 (stmt
);
6738 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6743 if (TREE_CODE (then_clause
) == SSA_NAME
)
6745 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6746 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6747 &then_def_stmt
, &def
, &dt
))
6750 else if (TREE_CODE (then_clause
) != INTEGER_CST
6751 && TREE_CODE (then_clause
) != REAL_CST
6752 && TREE_CODE (then_clause
) != FIXED_CST
)
6755 if (TREE_CODE (else_clause
) == SSA_NAME
)
6757 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6758 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6759 &else_def_stmt
, &def
, &dt
))
6762 else if (TREE_CODE (else_clause
) != INTEGER_CST
6763 && TREE_CODE (else_clause
) != REAL_CST
6764 && TREE_CODE (else_clause
) != FIXED_CST
)
6767 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
6768 /* The result of a vector comparison should be signed type. */
6769 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
6770 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
6771 if (vec_cmp_type
== NULL_TREE
)
6776 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6777 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
6784 vec_oprnds0
.create (1);
6785 vec_oprnds1
.create (1);
6786 vec_oprnds2
.create (1);
6787 vec_oprnds3
.create (1);
6791 scalar_dest
= gimple_assign_lhs (stmt
);
6792 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6794 /* Handle cond expr. */
6795 for (j
= 0; j
< ncopies
; j
++)
6797 gimple_assign new_stmt
= NULL
;
6802 auto_vec
<tree
, 4> ops
;
6803 auto_vec
<vec
<tree
>, 4> vec_defs
;
6805 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
6806 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
6807 ops
.safe_push (then_clause
);
6808 ops
.safe_push (else_clause
);
6809 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
6810 vec_oprnds3
= vec_defs
.pop ();
6811 vec_oprnds2
= vec_defs
.pop ();
6812 vec_oprnds1
= vec_defs
.pop ();
6813 vec_oprnds0
= vec_defs
.pop ();
6816 vec_defs
.release ();
6822 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
6824 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
6825 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
6828 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
6830 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
6831 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
6832 if (reduc_index
== 1)
6833 vec_then_clause
= reduc_def
;
6836 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
6838 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
6839 NULL
, >emp
, &def
, &dts
[2]);
6841 if (reduc_index
== 2)
6842 vec_else_clause
= reduc_def
;
6845 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
6847 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
6848 NULL
, >emp
, &def
, &dts
[3]);
6854 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
6855 vec_oprnds0
.pop ());
6856 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
6857 vec_oprnds1
.pop ());
6858 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
6859 vec_oprnds2
.pop ());
6860 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
6861 vec_oprnds3
.pop ());
6866 vec_oprnds0
.quick_push (vec_cond_lhs
);
6867 vec_oprnds1
.quick_push (vec_cond_rhs
);
6868 vec_oprnds2
.quick_push (vec_then_clause
);
6869 vec_oprnds3
.quick_push (vec_else_clause
);
6872 /* Arguments are ready. Create the new vector stmt. */
6873 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
6875 vec_cond_rhs
= vec_oprnds1
[i
];
6876 vec_then_clause
= vec_oprnds2
[i
];
6877 vec_else_clause
= vec_oprnds3
[i
];
6879 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
6880 vec_cond_lhs
, vec_cond_rhs
);
6881 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6882 vec_compare
, vec_then_clause
, vec_else_clause
);
6884 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6885 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6886 gimple_assign_set_lhs (new_stmt
, new_temp
);
6887 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6889 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6896 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6898 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6900 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6903 vec_oprnds0
.release ();
6904 vec_oprnds1
.release ();
6905 vec_oprnds2
.release ();
6906 vec_oprnds3
.release ();
6912 /* Make sure the statement is vectorizable. */
6915 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
6917 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6918 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6919 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
6921 tree scalar_type
, vectype
;
6922 gimple pattern_stmt
;
6923 gimple_seq pattern_def_seq
;
6925 if (dump_enabled_p ())
6927 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
6928 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6929 dump_printf (MSG_NOTE
, "\n");
6932 if (gimple_has_volatile_ops (stmt
))
6934 if (dump_enabled_p ())
6935 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6936 "not vectorized: stmt has volatile operands\n");
6941 /* Skip stmts that do not need to be vectorized. In loops this is expected
6943 - the COND_EXPR which is the loop exit condition
6944 - any LABEL_EXPRs in the loop
6945 - computations that are used only for array indexing or loop control.
6946 In basic blocks we only analyze statements that are a part of some SLP
6947 instance, therefore, all the statements are relevant.
6949 Pattern statement needs to be analyzed instead of the original statement
6950 if the original statement is not relevant. Otherwise, we analyze both
6951 statements. In basic blocks we are called from some SLP instance
6952 traversal, don't analyze pattern stmts instead, the pattern stmts
6953 already will be part of SLP instance. */
6955 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
6956 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6957 && !STMT_VINFO_LIVE_P (stmt_info
))
6959 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6961 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6962 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6964 /* Analyze PATTERN_STMT instead of the original stmt. */
6965 stmt
= pattern_stmt
;
6966 stmt_info
= vinfo_for_stmt (pattern_stmt
);
6967 if (dump_enabled_p ())
6969 dump_printf_loc (MSG_NOTE
, vect_location
,
6970 "==> examining pattern statement: ");
6971 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6972 dump_printf (MSG_NOTE
, "\n");
6977 if (dump_enabled_p ())
6978 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
6983 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6986 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6987 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6989 /* Analyze PATTERN_STMT too. */
6990 if (dump_enabled_p ())
6992 dump_printf_loc (MSG_NOTE
, vect_location
,
6993 "==> examining pattern statement: ");
6994 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6995 dump_printf (MSG_NOTE
, "\n");
6998 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7002 if (is_pattern_stmt_p (stmt_info
)
7004 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7006 gimple_stmt_iterator si
;
7008 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7010 gimple pattern_def_stmt
= gsi_stmt (si
);
7011 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7012 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7014 /* Analyze def stmt of STMT if it's a pattern stmt. */
7015 if (dump_enabled_p ())
7017 dump_printf_loc (MSG_NOTE
, vect_location
,
7018 "==> examining pattern def statement: ");
7019 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7020 dump_printf (MSG_NOTE
, "\n");
7023 if (!vect_analyze_stmt (pattern_def_stmt
,
7024 need_to_vectorize
, node
))
7030 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7032 case vect_internal_def
:
7035 case vect_reduction_def
:
7036 case vect_nested_cycle
:
7037 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7038 || relevance
== vect_used_in_outer_by_reduction
7039 || relevance
== vect_unused_in_scope
));
7042 case vect_induction_def
:
7043 case vect_constant_def
:
7044 case vect_external_def
:
7045 case vect_unknown_def_type
:
7052 gcc_assert (PURE_SLP_STMT (stmt_info
));
7054 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7055 if (dump_enabled_p ())
7057 dump_printf_loc (MSG_NOTE
, vect_location
,
7058 "get vectype for scalar type: ");
7059 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7060 dump_printf (MSG_NOTE
, "\n");
7063 vectype
= get_vectype_for_scalar_type (scalar_type
);
7066 if (dump_enabled_p ())
7068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7069 "not SLPed: unsupported data-type ");
7070 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7072 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7077 if (dump_enabled_p ())
7079 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7080 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7081 dump_printf (MSG_NOTE
, "\n");
7084 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7087 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7089 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7090 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7091 || (is_gimple_call (stmt
)
7092 && gimple_call_lhs (stmt
) == NULL_TREE
));
7093 *need_to_vectorize
= true;
7098 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7099 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7100 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7101 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7102 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7103 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7104 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7105 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7106 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7107 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7108 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7109 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7113 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7114 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7115 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7116 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7117 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7118 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7119 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7120 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7121 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7126 if (dump_enabled_p ())
7128 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7129 "not vectorized: relevant stmt not ");
7130 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7131 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7132 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7141 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7142 need extra handling, except for vectorizable reductions. */
7143 if (STMT_VINFO_LIVE_P (stmt_info
)
7144 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7145 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7149 if (dump_enabled_p ())
7151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7152 "not vectorized: live stmt not ");
7153 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7154 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7155 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7165 /* Function vect_transform_stmt.
7167 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7170 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7171 bool *grouped_store
, slp_tree slp_node
,
7172 slp_instance slp_node_instance
)
7174 bool is_store
= false;
7175 gimple vec_stmt
= NULL
;
7176 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7179 switch (STMT_VINFO_TYPE (stmt_info
))
7181 case type_demotion_vec_info_type
:
7182 case type_promotion_vec_info_type
:
7183 case type_conversion_vec_info_type
:
7184 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7188 case induc_vec_info_type
:
7189 gcc_assert (!slp_node
);
7190 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7194 case shift_vec_info_type
:
7195 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7199 case op_vec_info_type
:
7200 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7204 case assignment_vec_info_type
:
7205 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7209 case load_vec_info_type
:
7210 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7215 case store_vec_info_type
:
7216 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7218 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7220 /* In case of interleaving, the whole chain is vectorized when the
7221 last store in the chain is reached. Store stmts before the last
7222 one are skipped, and there vec_stmt_info shouldn't be freed
7224 *grouped_store
= true;
7225 if (STMT_VINFO_VEC_STMT (stmt_info
))
7232 case condition_vec_info_type
:
7233 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7237 case call_vec_info_type
:
7238 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7239 stmt
= gsi_stmt (*gsi
);
7240 if (is_gimple_call (stmt
)
7241 && gimple_call_internal_p (stmt
)
7242 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7246 case call_simd_clone_vec_info_type
:
7247 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7248 stmt
= gsi_stmt (*gsi
);
7251 case reduc_vec_info_type
:
7252 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7257 if (!STMT_VINFO_LIVE_P (stmt_info
))
7259 if (dump_enabled_p ())
7260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7261 "stmt not supported.\n");
7266 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7267 is being vectorized, but outside the immediately enclosing loop. */
7269 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7270 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7271 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7272 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7273 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7274 || STMT_VINFO_RELEVANT (stmt_info
) ==
7275 vect_used_in_outer_by_reduction
))
7277 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7278 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7279 imm_use_iterator imm_iter
;
7280 use_operand_p use_p
;
7284 if (dump_enabled_p ())
7285 dump_printf_loc (MSG_NOTE
, vect_location
,
7286 "Record the vdef for outer-loop vectorization.\n");
7288 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7289 (to be used when vectorizing outer-loop stmts that use the DEF of
7291 if (gimple_code (stmt
) == GIMPLE_PHI
)
7292 scalar_dest
= PHI_RESULT (stmt
);
7294 scalar_dest
= gimple_assign_lhs (stmt
);
7296 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7298 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7300 exit_phi
= USE_STMT (use_p
);
7301 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7306 /* Handle stmts whose DEF is used outside the loop-nest that is
7307 being vectorized. */
7308 if (STMT_VINFO_LIVE_P (stmt_info
)
7309 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7311 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7316 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7322 /* Remove a group of stores (for SLP or interleaving), free their
7326 vect_remove_stores (gimple first_stmt
)
7328 gimple next
= first_stmt
;
7330 gimple_stmt_iterator next_si
;
7334 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7336 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7337 if (is_pattern_stmt_p (stmt_info
))
7338 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7339 /* Free the attached stmt_vec_info and remove the stmt. */
7340 next_si
= gsi_for_stmt (next
);
7341 unlink_stmt_vdef (next
);
7342 gsi_remove (&next_si
, true);
7343 release_defs (next
);
7344 free_stmt_vec_info (next
);
7350 /* Function new_stmt_vec_info.
7352 Create and initialize a new stmt_vec_info struct for STMT. */
7355 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7356 bb_vec_info bb_vinfo
)
7359 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7361 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7362 STMT_VINFO_STMT (res
) = stmt
;
7363 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7364 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7365 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7366 STMT_VINFO_LIVE_P (res
) = false;
7367 STMT_VINFO_VECTYPE (res
) = NULL
;
7368 STMT_VINFO_VEC_STMT (res
) = NULL
;
7369 STMT_VINFO_VECTORIZABLE (res
) = true;
7370 STMT_VINFO_IN_PATTERN_P (res
) = false;
7371 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7372 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7373 STMT_VINFO_DATA_REF (res
) = NULL
;
7375 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7376 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7377 STMT_VINFO_DR_INIT (res
) = NULL
;
7378 STMT_VINFO_DR_STEP (res
) = NULL
;
7379 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7381 if (gimple_code (stmt
) == GIMPLE_PHI
7382 && is_loop_header_bb_p (gimple_bb (stmt
)))
7383 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7385 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7387 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7388 STMT_SLP_TYPE (res
) = loop_vect
;
7389 GROUP_FIRST_ELEMENT (res
) = NULL
;
7390 GROUP_NEXT_ELEMENT (res
) = NULL
;
7391 GROUP_SIZE (res
) = 0;
7392 GROUP_STORE_COUNT (res
) = 0;
7393 GROUP_GAP (res
) = 0;
7394 GROUP_SAME_DR_STMT (res
) = NULL
;
7400 /* Create a hash table for stmt_vec_info. */
7403 init_stmt_vec_info_vec (void)
7405 gcc_assert (!stmt_vec_info_vec
.exists ());
7406 stmt_vec_info_vec
.create (50);
7410 /* Free hash table for stmt_vec_info. */
7413 free_stmt_vec_info_vec (void)
7417 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7419 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7420 gcc_assert (stmt_vec_info_vec
.exists ());
7421 stmt_vec_info_vec
.release ();
7425 /* Free stmt vectorization related info. */
7428 free_stmt_vec_info (gimple stmt
)
7430 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7435 /* Check if this statement has a related "pattern stmt"
7436 (introduced by the vectorizer during the pattern recognition
7437 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7439 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7441 stmt_vec_info patt_info
7442 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7445 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7446 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7447 gimple_set_bb (patt_stmt
, NULL
);
7448 tree lhs
= gimple_get_lhs (patt_stmt
);
7449 if (TREE_CODE (lhs
) == SSA_NAME
)
7450 release_ssa_name (lhs
);
7453 gimple_stmt_iterator si
;
7454 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7456 gimple seq_stmt
= gsi_stmt (si
);
7457 gimple_set_bb (seq_stmt
, NULL
);
7458 lhs
= gimple_get_lhs (patt_stmt
);
7459 if (TREE_CODE (lhs
) == SSA_NAME
)
7460 release_ssa_name (lhs
);
7461 free_stmt_vec_info (seq_stmt
);
7464 free_stmt_vec_info (patt_stmt
);
7468 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7469 set_vinfo_for_stmt (stmt
, NULL
);
7474 /* Function get_vectype_for_scalar_type_and_size.
7476 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7480 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7482 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7483 enum machine_mode simd_mode
;
7484 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7491 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7492 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7495 /* For vector types of elements whose mode precision doesn't
7496 match their types precision we use a element type of mode
7497 precision. The vectorization routines will have to make sure
7498 they support the proper result truncation/extension.
7499 We also make sure to build vector types with INTEGER_TYPE
7500 component type only. */
7501 if (INTEGRAL_TYPE_P (scalar_type
)
7502 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7503 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7504 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7505 TYPE_UNSIGNED (scalar_type
));
7507 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7508 When the component mode passes the above test simply use a type
7509 corresponding to that mode. The theory is that any use that
7510 would cause problems with this will disable vectorization anyway. */
7511 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7512 && !INTEGRAL_TYPE_P (scalar_type
))
7513 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7515 /* We can't build a vector type of elements with alignment bigger than
7517 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7518 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7519 TYPE_UNSIGNED (scalar_type
));
7521 /* If we felt back to using the mode fail if there was
7522 no scalar type for it. */
7523 if (scalar_type
== NULL_TREE
)
7526 /* If no size was supplied use the mode the target prefers. Otherwise
7527 lookup a vector mode of the specified size. */
7529 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7531 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7532 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7536 vectype
= build_vector_type (scalar_type
, nunits
);
7538 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7539 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7545 unsigned int current_vector_size
;
7547 /* Function get_vectype_for_scalar_type.
7549 Returns the vector type corresponding to SCALAR_TYPE as supported
7553 get_vectype_for_scalar_type (tree scalar_type
)
7556 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7557 current_vector_size
);
7559 && current_vector_size
== 0)
7560 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7564 /* Function get_same_sized_vectype
7566 Returns a vector type corresponding to SCALAR_TYPE of size
7567 VECTOR_TYPE if supported by the target. */
7570 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7572 return get_vectype_for_scalar_type_and_size
7573 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7576 /* Function vect_is_simple_use.
7579 LOOP_VINFO - the vect info of the loop that is being vectorized.
7580 BB_VINFO - the vect info of the basic block that is being vectorized.
7581 OPERAND - operand of STMT in the loop or bb.
7582 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7584 Returns whether a stmt with OPERAND can be vectorized.
7585 For loops, supportable operands are constants, loop invariants, and operands
7586 that are defined by the current iteration of the loop. Unsupportable
7587 operands are those that are defined by a previous iteration of the loop (as
7588 is the case in reduction/induction computations).
7589 For basic blocks, supportable operands are constants and bb invariants.
7590 For now, operands defined outside the basic block are not supported. */
7593 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7594 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7595 tree
*def
, enum vect_def_type
*dt
)
7598 stmt_vec_info stmt_vinfo
;
7599 struct loop
*loop
= NULL
;
7602 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7607 if (dump_enabled_p ())
7609 dump_printf_loc (MSG_NOTE
, vect_location
,
7610 "vect_is_simple_use: operand ");
7611 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7612 dump_printf (MSG_NOTE
, "\n");
7615 if (CONSTANT_CLASS_P (operand
))
7617 *dt
= vect_constant_def
;
7621 if (is_gimple_min_invariant (operand
))
7624 *dt
= vect_external_def
;
7628 if (TREE_CODE (operand
) == PAREN_EXPR
)
7630 if (dump_enabled_p ())
7631 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7632 operand
= TREE_OPERAND (operand
, 0);
7635 if (TREE_CODE (operand
) != SSA_NAME
)
7637 if (dump_enabled_p ())
7638 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7643 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7644 if (*def_stmt
== NULL
)
7646 if (dump_enabled_p ())
7647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7652 if (dump_enabled_p ())
7654 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7655 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7656 dump_printf (MSG_NOTE
, "\n");
7659 /* Empty stmt is expected only in case of a function argument.
7660 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7661 if (gimple_nop_p (*def_stmt
))
7664 *dt
= vect_external_def
;
7668 bb
= gimple_bb (*def_stmt
);
7670 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7671 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7672 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7673 *dt
= vect_external_def
;
7676 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7677 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7680 if (*dt
== vect_unknown_def_type
7682 && *dt
== vect_double_reduction_def
7683 && gimple_code (stmt
) != GIMPLE_PHI
))
7685 if (dump_enabled_p ())
7686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7687 "Unsupported pattern.\n");
7691 if (dump_enabled_p ())
7692 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
7694 switch (gimple_code (*def_stmt
))
7697 *def
= gimple_phi_result (*def_stmt
);
7701 *def
= gimple_assign_lhs (*def_stmt
);
7705 *def
= gimple_call_lhs (*def_stmt
);
7710 if (dump_enabled_p ())
7711 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7712 "unsupported defining stmt:\n");
7719 /* Function vect_is_simple_use_1.
7721 Same as vect_is_simple_use_1 but also determines the vector operand
7722 type of OPERAND and stores it to *VECTYPE. If the definition of
7723 OPERAND is vect_uninitialized_def, vect_constant_def or
7724 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7725 is responsible to compute the best suited vector type for the
7729 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7730 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7731 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
7733 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
7737 /* Now get a vector type if the def is internal, otherwise supply
7738 NULL_TREE and leave it up to the caller to figure out a proper
7739 type for the use stmt. */
7740 if (*dt
== vect_internal_def
7741 || *dt
== vect_induction_def
7742 || *dt
== vect_reduction_def
7743 || *dt
== vect_double_reduction_def
7744 || *dt
== vect_nested_cycle
)
7746 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
7748 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7749 && !STMT_VINFO_RELEVANT (stmt_info
)
7750 && !STMT_VINFO_LIVE_P (stmt_info
))
7751 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7753 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7754 gcc_assert (*vectype
!= NULL_TREE
);
7756 else if (*dt
== vect_uninitialized_def
7757 || *dt
== vect_constant_def
7758 || *dt
== vect_external_def
)
7759 *vectype
= NULL_TREE
;
7767 /* Function supportable_widening_operation
7769 Check whether an operation represented by the code CODE is a
7770 widening operation that is supported by the target platform in
7771 vector form (i.e., when operating on arguments of type VECTYPE_IN
7772 producing a result of type VECTYPE_OUT).
7774 Widening operations we currently support are NOP (CONVERT), FLOAT
7775 and WIDEN_MULT. This function checks if these operations are supported
7776 by the target platform either directly (via vector tree-codes), or via
7780 - CODE1 and CODE2 are codes of vector operations to be used when
7781 vectorizing the operation, if available.
7782 - MULTI_STEP_CVT determines the number of required intermediate steps in
7783 case of multi-step conversion (like char->short->int - in that case
7784 MULTI_STEP_CVT will be 1).
7785 - INTERM_TYPES contains the intermediate type required to perform the
7786 widening operation (short in the above example). */
7789 supportable_widening_operation (enum tree_code code
, gimple stmt
,
7790 tree vectype_out
, tree vectype_in
,
7791 enum tree_code
*code1
, enum tree_code
*code2
,
7792 int *multi_step_cvt
,
7793 vec
<tree
> *interm_types
)
7795 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7796 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7797 struct loop
*vect_loop
= NULL
;
7798 enum machine_mode vec_mode
;
7799 enum insn_code icode1
, icode2
;
7800 optab optab1
, optab2
;
7801 tree vectype
= vectype_in
;
7802 tree wide_vectype
= vectype_out
;
7803 enum tree_code c1
, c2
;
7805 tree prev_type
, intermediate_type
;
7806 enum machine_mode intermediate_mode
, prev_mode
;
7807 optab optab3
, optab4
;
7809 *multi_step_cvt
= 0;
7811 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
7815 case WIDEN_MULT_EXPR
:
7816 /* The result of a vectorized widening operation usually requires
7817 two vectors (because the widened results do not fit into one vector).
7818 The generated vector results would normally be expected to be
7819 generated in the same order as in the original scalar computation,
7820 i.e. if 8 results are generated in each vector iteration, they are
7821 to be organized as follows:
7822 vect1: [res1,res2,res3,res4],
7823 vect2: [res5,res6,res7,res8].
7825 However, in the special case that the result of the widening
7826 operation is used in a reduction computation only, the order doesn't
7827 matter (because when vectorizing a reduction we change the order of
7828 the computation). Some targets can take advantage of this and
7829 generate more efficient code. For example, targets like Altivec,
7830 that support widen_mult using a sequence of {mult_even,mult_odd}
7831 generate the following vectors:
7832 vect1: [res1,res3,res5,res7],
7833 vect2: [res2,res4,res6,res8].
7835 When vectorizing outer-loops, we execute the inner-loop sequentially
7836 (each vectorized inner-loop iteration contributes to VF outer-loop
7837 iterations in parallel). We therefore don't allow to change the
7838 order of the computation in the inner-loop during outer-loop
7840 /* TODO: Another case in which order doesn't *really* matter is when we
7841 widen and then contract again, e.g. (short)((int)x * y >> 8).
7842 Normally, pack_trunc performs an even/odd permute, whereas the
7843 repack from an even/odd expansion would be an interleave, which
7844 would be significantly simpler for e.g. AVX2. */
7845 /* In any case, in order to avoid duplicating the code below, recurse
7846 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7847 are properly set up for the caller. If we fail, we'll continue with
7848 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7850 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
7851 && !nested_in_vect_loop_p (vect_loop
, stmt
)
7852 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
7853 stmt
, vectype_out
, vectype_in
,
7854 code1
, code2
, multi_step_cvt
,
7857 /* Elements in a vector with vect_used_by_reduction property cannot
7858 be reordered if the use chain with this property does not have the
7859 same operation. One such an example is s += a * b, where elements
7860 in a and b cannot be reordered. Here we check if the vector defined
7861 by STMT is only directly used in the reduction statement. */
7862 tree lhs
= gimple_assign_lhs (stmt
);
7863 use_operand_p dummy
;
7865 stmt_vec_info use_stmt_info
= NULL
;
7866 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
7867 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
7868 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
7871 c1
= VEC_WIDEN_MULT_LO_EXPR
;
7872 c2
= VEC_WIDEN_MULT_HI_EXPR
;
7875 case VEC_WIDEN_MULT_EVEN_EXPR
:
7876 /* Support the recursion induced just above. */
7877 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
7878 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
7881 case WIDEN_LSHIFT_EXPR
:
7882 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
7883 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
7887 c1
= VEC_UNPACK_LO_EXPR
;
7888 c2
= VEC_UNPACK_HI_EXPR
;
7892 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
7893 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
7896 case FIX_TRUNC_EXPR
:
7897 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7898 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7899 computing the operation. */
7906 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
7908 enum tree_code ctmp
= c1
;
7913 if (code
== FIX_TRUNC_EXPR
)
7915 /* The signedness is determined from output operand. */
7916 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7917 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
7921 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7922 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
7925 if (!optab1
|| !optab2
)
7928 vec_mode
= TYPE_MODE (vectype
);
7929 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
7930 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
7936 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7937 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7940 /* Check if it's a multi-step conversion that can be done using intermediate
7943 prev_type
= vectype
;
7944 prev_mode
= vec_mode
;
7946 if (!CONVERT_EXPR_CODE_P (code
))
7949 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7950 intermediate steps in promotion sequence. We try
7951 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7953 interm_types
->create (MAX_INTERM_CVT_STEPS
);
7954 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
7956 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
7958 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
7959 TYPE_UNSIGNED (prev_type
));
7960 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
7961 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
7963 if (!optab3
|| !optab4
7964 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
7965 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
7966 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
7967 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
7968 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
7969 == CODE_FOR_nothing
)
7970 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
7971 == CODE_FOR_nothing
))
7974 interm_types
->quick_push (intermediate_type
);
7975 (*multi_step_cvt
)++;
7977 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7978 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7981 prev_type
= intermediate_type
;
7982 prev_mode
= intermediate_mode
;
7985 interm_types
->release ();
7990 /* Function supportable_narrowing_operation
7992 Check whether an operation represented by the code CODE is a
7993 narrowing operation that is supported by the target platform in
7994 vector form (i.e., when operating on arguments of type VECTYPE_IN
7995 and producing a result of type VECTYPE_OUT).
7997 Narrowing operations we currently support are NOP (CONVERT) and
7998 FIX_TRUNC. This function checks if these operations are supported by
7999 the target platform directly via vector tree-codes.
8002 - CODE1 is the code of a vector operation to be used when
8003 vectorizing the operation, if available.
8004 - MULTI_STEP_CVT determines the number of required intermediate steps in
8005 case of multi-step conversion (like int->short->char - in that case
8006 MULTI_STEP_CVT will be 1).
8007 - INTERM_TYPES contains the intermediate type required to perform the
8008 narrowing operation (short in the above example). */
8011 supportable_narrowing_operation (enum tree_code code
,
8012 tree vectype_out
, tree vectype_in
,
8013 enum tree_code
*code1
, int *multi_step_cvt
,
8014 vec
<tree
> *interm_types
)
8016 enum machine_mode vec_mode
;
8017 enum insn_code icode1
;
8018 optab optab1
, interm_optab
;
8019 tree vectype
= vectype_in
;
8020 tree narrow_vectype
= vectype_out
;
8022 tree intermediate_type
;
8023 enum machine_mode intermediate_mode
, prev_mode
;
8027 *multi_step_cvt
= 0;
8031 c1
= VEC_PACK_TRUNC_EXPR
;
8034 case FIX_TRUNC_EXPR
:
8035 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8039 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8040 tree code and optabs used for computing the operation. */
8047 if (code
== FIX_TRUNC_EXPR
)
8048 /* The signedness is determined from output operand. */
8049 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8051 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8056 vec_mode
= TYPE_MODE (vectype
);
8057 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8062 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8065 /* Check if it's a multi-step conversion that can be done using intermediate
8067 prev_mode
= vec_mode
;
8068 if (code
== FIX_TRUNC_EXPR
)
8069 uns
= TYPE_UNSIGNED (vectype_out
);
8071 uns
= TYPE_UNSIGNED (vectype
);
8073 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8074 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8075 costly than signed. */
8076 if (code
== FIX_TRUNC_EXPR
&& uns
)
8078 enum insn_code icode2
;
8081 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8083 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8084 if (interm_optab
!= unknown_optab
8085 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8086 && insn_data
[icode1
].operand
[0].mode
8087 == insn_data
[icode2
].operand
[0].mode
)
8090 optab1
= interm_optab
;
8095 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8096 intermediate steps in promotion sequence. We try
8097 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8098 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8099 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8101 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8103 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8105 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8108 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8109 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8110 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8111 == CODE_FOR_nothing
))
8114 interm_types
->quick_push (intermediate_type
);
8115 (*multi_step_cvt
)++;
8117 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8120 prev_mode
= intermediate_mode
;
8121 optab1
= interm_optab
;
8124 interm_types
->release ();