1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "stor-layout.h"
35 #include "hard-reg-set.h"
38 #include "dominance.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
45 #include "gimple-expr.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
62 #include "recog.h" /* FIXME: for insn_data */
64 #include "diagnostic-core.h"
65 #include "tree-vectorizer.h"
70 /* For lang_hooks.types.type_for_mode. */
71 #include "langhooks.h"
73 /* Return the vectorized type for the given statement. */
76 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
78 return STMT_VINFO_VECTYPE (stmt_info
);
81 /* Return TRUE iff the given statement is in an inner loop relative to
82 the loop being vectorized. */
84 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
86 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
87 basic_block bb
= gimple_bb (stmt
);
88 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
94 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
96 return (bb
->loop_father
== loop
->inner
);
99 /* Record the cost of a statement, either by directly informing the
100 target model or by saving it in a vector for later processing.
101 Return a preliminary estimate of the statement's cost. */
104 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
105 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
106 int misalign
, enum vect_cost_model_location where
)
110 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
111 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
112 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
115 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
120 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
121 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
122 void *target_cost_data
;
125 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
127 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
129 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
134 /* Return a variable of type ELEM_TYPE[NELEMS]. */
137 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
139 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
143 /* ARRAY is an array of vectors created by create_vector_array.
144 Return an SSA_NAME for the vector in index N. The reference
145 is part of the vectorization of STMT and the vector is associated
146 with scalar destination SCALAR_DEST. */
149 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
150 tree array
, unsigned HOST_WIDE_INT n
)
152 tree vect_type
, vect
, vect_name
, array_ref
;
155 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
156 vect_type
= TREE_TYPE (TREE_TYPE (array
));
157 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
158 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
159 build_int_cst (size_type_node
, n
),
160 NULL_TREE
, NULL_TREE
);
162 new_stmt
= gimple_build_assign (vect
, array_ref
);
163 vect_name
= make_ssa_name (vect
, new_stmt
);
164 gimple_assign_set_lhs (new_stmt
, vect_name
);
165 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
170 /* ARRAY is an array of vectors created by create_vector_array.
171 Emit code to store SSA_NAME VECT in index N of the array.
172 The store is part of the vectorization of STMT. */
175 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
176 tree array
, unsigned HOST_WIDE_INT n
)
181 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
182 build_int_cst (size_type_node
, n
),
183 NULL_TREE
, NULL_TREE
);
185 new_stmt
= gimple_build_assign (array_ref
, vect
);
186 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
189 /* PTR is a pointer to an array of type TYPE. Return a representation
190 of *PTR. The memory reference replaces those in FIRST_DR
194 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
196 tree mem_ref
, alias_ptr_type
;
198 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
199 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
200 /* Arrays have the same alignment as their type. */
201 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
205 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
207 /* Function vect_mark_relevant.
209 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
212 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
213 enum vect_relevant relevant
, bool live_p
,
214 bool used_in_pattern
)
216 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
217 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
218 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
221 if (dump_enabled_p ())
222 dump_printf_loc (MSG_NOTE
, vect_location
,
223 "mark relevant %d, live %d.\n", relevant
, live_p
);
225 /* If this stmt is an original stmt in a pattern, we might need to mark its
226 related pattern stmt instead of the original stmt. However, such stmts
227 may have their own uses that are not in any pattern, in such cases the
228 stmt itself should be marked. */
229 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
232 if (!used_in_pattern
)
234 imm_use_iterator imm_iter
;
238 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
239 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
241 if (is_gimple_assign (stmt
))
242 lhs
= gimple_assign_lhs (stmt
);
244 lhs
= gimple_call_lhs (stmt
);
246 /* This use is out of pattern use, if LHS has other uses that are
247 pattern uses, we should mark the stmt itself, and not the pattern
249 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
250 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
252 if (is_gimple_debug (USE_STMT (use_p
)))
254 use_stmt
= USE_STMT (use_p
);
256 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
259 if (vinfo_for_stmt (use_stmt
)
260 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
270 /* This is the last stmt in a sequence that was detected as a
271 pattern that can potentially be vectorized. Don't mark the stmt
272 as relevant/live because it's not going to be vectorized.
273 Instead mark the pattern-stmt that replaces it. */
275 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
277 if (dump_enabled_p ())
278 dump_printf_loc (MSG_NOTE
, vect_location
,
279 "last stmt in pattern. don't mark"
280 " relevant/live.\n");
281 stmt_info
= vinfo_for_stmt (pattern_stmt
);
282 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
283 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
284 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
289 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
290 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
291 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
293 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
294 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_NOTE
, vect_location
,
298 "already marked relevant/live.\n");
302 worklist
->safe_push (stmt
);
306 /* Function vect_stmt_relevant_p.
308 Return true if STMT in loop that is represented by LOOP_VINFO is
309 "relevant for vectorization".
311 A stmt is considered "relevant for vectorization" if:
312 - it has uses outside the loop.
313 - it has vdefs (it alters memory).
314 - control stmts in the loop (except for the exit condition).
316 CHECKME: what other side effects would the vectorizer allow? */
319 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
320 enum vect_relevant
*relevant
, bool *live_p
)
322 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
324 imm_use_iterator imm_iter
;
328 *relevant
= vect_unused_in_scope
;
331 /* cond stmt other than loop exit cond. */
332 if (is_ctrl_stmt (stmt
)
333 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
334 != loop_exit_ctrl_vec_info_type
)
335 *relevant
= vect_used_in_scope
;
337 /* changing memory. */
338 if (gimple_code (stmt
) != GIMPLE_PHI
)
339 if (gimple_vdef (stmt
))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE
, vect_location
,
343 "vec_stmt_relevant_p: stmt has vdefs.\n");
344 *relevant
= vect_used_in_scope
;
347 /* uses outside the loop. */
348 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
350 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
352 basic_block bb
= gimple_bb (USE_STMT (use_p
));
353 if (!flow_bb_inside_loop_p (loop
, bb
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: used out of loop.\n");
359 if (is_gimple_debug (USE_STMT (use_p
)))
362 /* We expect all such uses to be in the loop exit phis
363 (because of loop closed form) */
364 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
365 gcc_assert (bb
== single_exit (loop
)->dest
);
372 return (*live_p
|| *relevant
);
376 /* Function exist_non_indexing_operands_for_use_p
378 USE is one of the uses attached to STMT. Check if USE is
379 used in STMT for anything other than indexing an array. */
382 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
385 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
387 /* USE corresponds to some operand in STMT. If there is no data
388 reference in STMT, then any operand that corresponds to USE
389 is not indexing an array. */
390 if (!STMT_VINFO_DATA_REF (stmt_info
))
393 /* STMT has a data_ref. FORNOW this means that its of one of
397 (This should have been verified in analyze_data_refs).
399 'var' in the second case corresponds to a def, not a use,
400 so USE cannot correspond to any operands that are not used
403 Therefore, all we need to check is if STMT falls into the
404 first case, and whether var corresponds to USE. */
406 if (!gimple_assign_copy_p (stmt
))
408 if (is_gimple_call (stmt
)
409 && gimple_call_internal_p (stmt
))
410 switch (gimple_call_internal_fn (stmt
))
413 operand
= gimple_call_arg (stmt
, 3);
418 operand
= gimple_call_arg (stmt
, 2);
428 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
430 operand
= gimple_assign_rhs1 (stmt
);
431 if (TREE_CODE (operand
) != SSA_NAME
)
442 Function process_use.
445 - a USE in STMT in a loop represented by LOOP_VINFO
446 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
447 that defined USE. This is done by calling mark_relevant and passing it
448 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
449 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
453 Generally, LIVE_P and RELEVANT are used to define the liveness and
454 relevance info of the DEF_STMT of this USE:
455 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
456 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
458 - case 1: If USE is used only for address computations (e.g. array indexing),
459 which does not need to be directly vectorized, then the liveness/relevance
460 of the respective DEF_STMT is left unchanged.
461 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
462 skip DEF_STMT cause it had already been processed.
463 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
464 be modified accordingly.
466 Return true if everything is as expected. Return false otherwise. */
469 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
470 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
473 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
474 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
475 stmt_vec_info dstmt_vinfo
;
476 basic_block bb
, def_bb
;
479 enum vect_def_type dt
;
481 /* case 1: we are only interested in uses that need to be vectorized. Uses
482 that are used for address computation are not considered relevant. */
483 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
486 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
490 "not vectorized: unsupported use in stmt.\n");
494 if (!def_stmt
|| gimple_nop_p (def_stmt
))
497 def_bb
= gimple_bb (def_stmt
);
498 if (!flow_bb_inside_loop_p (loop
, def_bb
))
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
505 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
506 DEF_STMT must have already been processed, because this should be the
507 only way that STMT, which is a reduction-phi, was put in the worklist,
508 as there should be no other uses for DEF_STMT in the loop. So we just
509 check that everything is as expected, and we are done. */
510 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
511 bb
= gimple_bb (stmt
);
512 if (gimple_code (stmt
) == GIMPLE_PHI
513 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
514 && gimple_code (def_stmt
) != GIMPLE_PHI
515 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
516 && bb
->loop_father
== def_bb
->loop_father
)
518 if (dump_enabled_p ())
519 dump_printf_loc (MSG_NOTE
, vect_location
,
520 "reduc-stmt defining reduc-phi in the same nest.\n");
521 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
522 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
523 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
524 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
525 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
536 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE
, vect_location
,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
544 case vect_unused_in_scope
:
545 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
546 vect_used_in_scope
: vect_unused_in_scope
;
549 case vect_used_in_outer_by_reduction
:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
551 relevant
= vect_used_by_reduction
;
554 case vect_used_in_outer
:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
556 relevant
= vect_used_in_scope
;
559 case vect_used_in_scope
:
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
574 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE
, vect_location
,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
582 case vect_unused_in_scope
:
583 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
585 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
588 case vect_used_by_reduction
:
589 relevant
= vect_used_in_outer_by_reduction
;
592 case vect_used_in_scope
:
593 relevant
= vect_used_in_outer
;
601 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
602 is_pattern_stmt_p (stmt_vinfo
));
607 /* Function vect_mark_stmts_to_be_vectorized.
609 Not all stmts in the loop need to be vectorized. For example:
618 Stmt 1 and 3 do not need to be vectorized, because loop control and
619 addressing of vectorized data-refs are handled differently.
621 This pass detects such stmts. */
624 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
626 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
627 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
628 unsigned int nbbs
= loop
->num_nodes
;
629 gimple_stmt_iterator si
;
632 stmt_vec_info stmt_vinfo
;
636 enum vect_relevant relevant
, tmp_relevant
;
637 enum vect_def_type def_type
;
639 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE
, vect_location
,
641 "=== vect_mark_stmts_to_be_vectorized ===\n");
643 auto_vec
<gimple
, 64> worklist
;
645 /* 1. Init worklist. */
646 for (i
= 0; i
< nbbs
; i
++)
649 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
652 if (dump_enabled_p ())
654 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
655 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
656 dump_printf (MSG_NOTE
, "\n");
659 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
660 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
662 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
664 stmt
= gsi_stmt (si
);
665 if (dump_enabled_p ())
667 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
668 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
669 dump_printf (MSG_NOTE
, "\n");
672 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
673 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
677 /* 2. Process_worklist */
678 while (worklist
.length () > 0)
683 stmt
= worklist
.pop ();
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
688 dump_printf (MSG_NOTE
, "\n");
691 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
692 (DEF_STMT) as relevant/irrelevant and live/dead according to the
693 liveness and relevance properties of STMT. */
694 stmt_vinfo
= vinfo_for_stmt (stmt
);
695 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
696 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
698 /* Generally, the liveness and relevance properties of STMT are
699 propagated as is to the DEF_STMTs of its USEs:
700 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
701 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the liveness/relevance as follows:
706 relevant = vect_used_by_reduction
707 This is because we distinguish between two kinds of relevant stmts -
708 those that are used by a reduction computation, and those that are
709 (also) used by a regular computation. This allows us later on to
710 identify stmts that are used solely by a reduction, and therefore the
711 order of the results that they produce does not have to be kept. */
713 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
714 tmp_relevant
= relevant
;
717 case vect_reduction_def
:
718 switch (tmp_relevant
)
720 case vect_unused_in_scope
:
721 relevant
= vect_used_by_reduction
;
724 case vect_used_by_reduction
:
725 if (gimple_code (stmt
) == GIMPLE_PHI
)
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
732 "unsupported use of reduction.\n");
739 case vect_nested_cycle
:
740 if (tmp_relevant
!= vect_unused_in_scope
741 && tmp_relevant
!= vect_used_in_outer_by_reduction
742 && tmp_relevant
!= vect_used_in_outer
)
744 if (dump_enabled_p ())
745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
746 "unsupported use of nested cycle.\n");
754 case vect_double_reduction_def
:
755 if (tmp_relevant
!= vect_unused_in_scope
756 && tmp_relevant
!= vect_used_by_reduction
)
758 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
760 "unsupported use of double reduction.\n");
772 if (is_pattern_stmt_p (stmt_vinfo
))
774 /* Pattern statements are not inserted into the code, so
775 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
776 have to scan the RHS or function arguments instead. */
777 if (is_gimple_assign (stmt
))
779 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
780 tree op
= gimple_assign_rhs1 (stmt
);
783 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
785 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
786 live_p
, relevant
, &worklist
, false)
787 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
788 live_p
, relevant
, &worklist
, false))
792 for (; i
< gimple_num_ops (stmt
); i
++)
794 op
= gimple_op (stmt
, i
);
795 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
800 else if (is_gimple_call (stmt
))
802 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
804 tree arg
= gimple_call_arg (stmt
, i
);
805 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
812 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
814 tree op
= USE_FROM_PTR (use_p
);
815 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
820 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
823 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
825 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
829 } /* while worklist */
835 /* Function vect_model_simple_cost.
837 Models cost for simple operations, i.e. those that only emit ncopies of a
838 single op. Right now, this does not account for multiple insns that could
839 be generated for the single vector op. We will handle that shortly. */
842 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
843 enum vect_def_type
*dt
,
844 stmt_vector_for_cost
*prologue_cost_vec
,
845 stmt_vector_for_cost
*body_cost_vec
)
848 int inside_cost
= 0, prologue_cost
= 0;
850 /* The SLP costs were already calculated during SLP tree build. */
851 if (PURE_SLP_STMT (stmt_info
))
854 /* FORNOW: Assuming maximum 2 args per stmts. */
855 for (i
= 0; i
< 2; i
++)
856 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
857 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
858 stmt_info
, 0, vect_prologue
);
860 /* Pass the inside-of-loop statements to the target-specific cost model. */
861 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
862 stmt_info
, 0, vect_body
);
864 if (dump_enabled_p ())
865 dump_printf_loc (MSG_NOTE
, vect_location
,
866 "vect_model_simple_cost: inside_cost = %d, "
867 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
871 /* Model cost for type demotion and promotion operations. PWR is normally
872 zero for single-step promotions and demotions. It will be one if
873 two-step promotion/demotion is required, and so on. Each additional
874 step doubles the number of instructions required. */
877 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
878 enum vect_def_type
*dt
, int pwr
)
881 int inside_cost
= 0, prologue_cost
= 0;
882 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
883 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
884 void *target_cost_data
;
886 /* The SLP costs were already calculated during SLP tree build. */
887 if (PURE_SLP_STMT (stmt_info
))
891 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
893 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
895 for (i
= 0; i
< pwr
+ 1; i
++)
897 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
899 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
900 vec_promote_demote
, stmt_info
, 0,
904 /* FORNOW: Assuming maximum 2 args per stmts. */
905 for (i
= 0; i
< 2; i
++)
906 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
907 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
908 stmt_info
, 0, vect_prologue
);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE
, vect_location
,
912 "vect_model_promotion_demotion_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
916 /* Function vect_cost_group_size
918 For grouped load or store, return the group_size only if it is the first
919 load or store of a group, else return 1. This ensures that group size is
920 only returned once per group. */
923 vect_cost_group_size (stmt_vec_info stmt_info
)
925 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
927 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
928 return GROUP_SIZE (stmt_info
);
934 /* Function vect_model_store_cost
936 Models cost for stores. In the case of grouped accesses, one access
937 has the overhead of the grouped access attributed to it. */
940 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
941 bool store_lanes_p
, enum vect_def_type dt
,
943 stmt_vector_for_cost
*prologue_cost_vec
,
944 stmt_vector_for_cost
*body_cost_vec
)
947 unsigned int inside_cost
= 0, prologue_cost
= 0;
948 struct data_reference
*first_dr
;
951 /* The SLP costs were already calculated during SLP tree build. */
952 if (PURE_SLP_STMT (stmt_info
))
955 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
956 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
957 stmt_info
, 0, vect_prologue
);
959 /* Grouped access? */
960 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
964 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
969 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
970 group_size
= vect_cost_group_size (stmt_info
);
973 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
975 /* Not a grouped access. */
979 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
982 /* We assume that the cost of a single store-lanes instruction is
983 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
984 access is instead being provided by a permute-and-store operation,
985 include the cost of the permutes. */
986 if (!store_lanes_p
&& group_size
> 1)
988 /* Uses a high and low interleave or shuffle operations for each
990 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
991 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
992 stmt_info
, 0, vect_body
);
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_NOTE
, vect_location
,
996 "vect_model_store_cost: strided group_size = %d .\n",
1000 /* Costs of the stores. */
1001 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_NOTE
, vect_location
,
1005 "vect_model_store_cost: inside_cost = %d, "
1006 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1010 /* Calculate cost of DR's memory access. */
1012 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1013 unsigned int *inside_cost
,
1014 stmt_vector_for_cost
*body_cost_vec
)
1016 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1017 gimple stmt
= DR_STMT (dr
);
1018 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1020 switch (alignment_support_scheme
)
1024 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1025 vector_store
, stmt_info
, 0,
1028 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_NOTE
, vect_location
,
1030 "vect_model_store_cost: aligned.\n");
1034 case dr_unaligned_supported
:
1036 /* Here, we assign an additional cost for the unaligned store. */
1037 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1038 unaligned_store
, stmt_info
,
1039 DR_MISALIGNMENT (dr
), vect_body
);
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_NOTE
, vect_location
,
1042 "vect_model_store_cost: unaligned supported by "
1047 case dr_unaligned_unsupported
:
1049 *inside_cost
= VECT_MAX_COST
;
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1053 "vect_model_store_cost: unsupported access.\n");
1063 /* Function vect_model_load_cost
1065 Models cost for loads. In the case of grouped accesses, the last access
1066 has the overhead of the grouped access attributed to it. Since unaligned
1067 accesses are supported for loads, we also account for the costs of the
1068 access scheme chosen. */
1071 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1072 bool load_lanes_p
, slp_tree slp_node
,
1073 stmt_vector_for_cost
*prologue_cost_vec
,
1074 stmt_vector_for_cost
*body_cost_vec
)
1078 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1079 unsigned int inside_cost
= 0, prologue_cost
= 0;
1081 /* The SLP costs were already calculated during SLP tree build. */
1082 if (PURE_SLP_STMT (stmt_info
))
1085 /* Grouped accesses? */
1086 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1087 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1089 group_size
= vect_cost_group_size (stmt_info
);
1090 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1092 /* Not a grouped access. */
1099 /* We assume that the cost of a single load-lanes instruction is
1100 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1101 access is instead being provided by a load-and-permute operation,
1102 include the cost of the permutes. */
1103 if (!load_lanes_p
&& group_size
> 1)
1105 /* Uses an even and odd extract operations or shuffle operations
1106 for each needed permute. */
1107 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1108 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1109 stmt_info
, 0, vect_body
);
1111 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE
, vect_location
,
1113 "vect_model_load_cost: strided group_size = %d .\n",
1117 /* The loads themselves. */
1118 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1120 /* N scalar loads plus gathering them into a vector. */
1121 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1122 inside_cost
+= record_stmt_cost (body_cost_vec
,
1123 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1124 scalar_load
, stmt_info
, 0, vect_body
);
1125 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1126 stmt_info
, 0, vect_body
);
1129 vect_get_load_cost (first_dr
, ncopies
,
1130 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1131 || group_size
> 1 || slp_node
),
1132 &inside_cost
, &prologue_cost
,
1133 prologue_cost_vec
, body_cost_vec
, true);
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE
, vect_location
,
1137 "vect_model_load_cost: inside_cost = %d, "
1138 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1142 /* Calculate cost of DR's memory access. */
1144 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1145 bool add_realign_cost
, unsigned int *inside_cost
,
1146 unsigned int *prologue_cost
,
1147 stmt_vector_for_cost
*prologue_cost_vec
,
1148 stmt_vector_for_cost
*body_cost_vec
,
1149 bool record_prologue_costs
)
1151 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1152 gimple stmt
= DR_STMT (dr
);
1153 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1155 switch (alignment_support_scheme
)
1159 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1160 stmt_info
, 0, vect_body
);
1162 if (dump_enabled_p ())
1163 dump_printf_loc (MSG_NOTE
, vect_location
,
1164 "vect_model_load_cost: aligned.\n");
1168 case dr_unaligned_supported
:
1170 /* Here, we assign an additional cost for the unaligned load. */
1171 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1172 unaligned_load
, stmt_info
,
1173 DR_MISALIGNMENT (dr
), vect_body
);
1175 if (dump_enabled_p ())
1176 dump_printf_loc (MSG_NOTE
, vect_location
,
1177 "vect_model_load_cost: unaligned supported by "
1182 case dr_explicit_realign
:
1184 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1185 vector_load
, stmt_info
, 0, vect_body
);
1186 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1187 vec_perm
, stmt_info
, 0, vect_body
);
1189 /* FIXME: If the misalignment remains fixed across the iterations of
1190 the containing loop, the following cost should be added to the
1192 if (targetm
.vectorize
.builtin_mask_for_load
)
1193 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1194 stmt_info
, 0, vect_body
);
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE
, vect_location
,
1198 "vect_model_load_cost: explicit realign\n");
1202 case dr_explicit_realign_optimized
:
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE
, vect_location
,
1206 "vect_model_load_cost: unaligned software "
1209 /* Unaligned software pipeline has a load of an address, an initial
1210 load, and possibly a mask operation to "prime" the loop. However,
1211 if this is an access in a group of loads, which provide grouped
1212 access, then the above cost should only be considered for one
1213 access in the group. Inside the loop, there is a load op
1214 and a realignment op. */
1216 if (add_realign_cost
&& record_prologue_costs
)
1218 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1219 vector_stmt
, stmt_info
,
1221 if (targetm
.vectorize
.builtin_mask_for_load
)
1222 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1223 vector_stmt
, stmt_info
,
1227 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1228 stmt_info
, 0, vect_body
);
1229 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1230 stmt_info
, 0, vect_body
);
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE
, vect_location
,
1234 "vect_model_load_cost: explicit realign optimized"
1240 case dr_unaligned_unsupported
:
1242 *inside_cost
= VECT_MAX_COST
;
1244 if (dump_enabled_p ())
1245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1246 "vect_model_load_cost: unsupported access.\n");
1255 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1256 the loop preheader for the vectorized stmt STMT. */
1259 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1262 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1265 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1266 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1270 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1274 if (nested_in_vect_loop_p (loop
, stmt
))
1277 pe
= loop_preheader_edge (loop
);
1278 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1279 gcc_assert (!new_bb
);
1283 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1285 gimple_stmt_iterator gsi_bb_start
;
1287 gcc_assert (bb_vinfo
);
1288 bb
= BB_VINFO_BB (bb_vinfo
);
1289 gsi_bb_start
= gsi_after_labels (bb
);
1290 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1294 if (dump_enabled_p ())
1296 dump_printf_loc (MSG_NOTE
, vect_location
,
1297 "created new init_stmt: ");
1298 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1299 dump_printf (MSG_NOTE
, "\n");
1303 /* Function vect_init_vector.
1305 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1306 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1307 vector type a vector with all elements equal to VAL is created first.
1308 Place the initialization at BSI if it is not NULL. Otherwise, place the
1309 initialization at the loop preheader.
1310 Return the DEF of INIT_STMT.
1311 It will be used in the vectorization of STMT. */
1314 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1321 if (TREE_CODE (type
) == VECTOR_TYPE
1322 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1324 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1326 if (CONSTANT_CLASS_P (val
))
1327 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1330 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1331 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1334 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1338 val
= build_vector_from_val (type
, val
);
1341 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1342 init_stmt
= gimple_build_assign (new_var
, val
);
1343 new_temp
= make_ssa_name (new_var
, init_stmt
);
1344 gimple_assign_set_lhs (init_stmt
, new_temp
);
1345 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1346 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1351 /* Function vect_get_vec_def_for_operand.
1353 OP is an operand in STMT. This function returns a (vector) def that will be
1354 used in the vectorized stmt for STMT.
1356 In the case that OP is an SSA_NAME which is defined in the loop, then
1357 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1359 In case OP is an invariant or constant, a new stmt that creates a vector def
1360 needs to be introduced. */
1363 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1368 stmt_vec_info def_stmt_info
= NULL
;
1369 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1370 unsigned int nunits
;
1371 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1373 enum vect_def_type dt
;
1377 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE
, vect_location
,
1380 "vect_get_vec_def_for_operand: ");
1381 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1382 dump_printf (MSG_NOTE
, "\n");
1385 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1386 &def_stmt
, &def
, &dt
);
1387 gcc_assert (is_simple_use
);
1388 if (dump_enabled_p ())
1390 int loc_printed
= 0;
1393 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1395 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1396 dump_printf (MSG_NOTE
, "\n");
1401 dump_printf (MSG_NOTE
, " def_stmt = ");
1403 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1404 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1405 dump_printf (MSG_NOTE
, "\n");
1411 /* Case 1: operand is a constant. */
1412 case vect_constant_def
:
1414 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1415 gcc_assert (vector_type
);
1416 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1421 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1422 if (dump_enabled_p ())
1423 dump_printf_loc (MSG_NOTE
, vect_location
,
1424 "Create vector_cst. nunits = %d\n", nunits
);
1426 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1429 /* Case 2: operand is defined outside the loop - loop invariant. */
1430 case vect_external_def
:
1432 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1433 gcc_assert (vector_type
);
1438 /* Create 'vec_inv = {inv,inv,..,inv}' */
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1442 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1445 /* Case 3: operand is defined inside the loop. */
1446 case vect_internal_def
:
1449 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1451 /* Get the def from the vectorized stmt. */
1452 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1454 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1455 /* Get vectorized pattern statement. */
1457 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1458 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1459 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1460 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1461 gcc_assert (vec_stmt
);
1462 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1463 vec_oprnd
= PHI_RESULT (vec_stmt
);
1464 else if (is_gimple_call (vec_stmt
))
1465 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1467 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1471 /* Case 4: operand is defined by a loop header phi - reduction */
1472 case vect_reduction_def
:
1473 case vect_double_reduction_def
:
1474 case vect_nested_cycle
:
1478 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1479 loop
= (gimple_bb (def_stmt
))->loop_father
;
1481 /* Get the def before the loop */
1482 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1483 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1486 /* Case 5: operand is defined by loop-header phi - induction. */
1487 case vect_induction_def
:
1489 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1491 /* Get the def from the vectorized stmt. */
1492 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1493 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1494 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1495 vec_oprnd
= PHI_RESULT (vec_stmt
);
1497 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1507 /* Function vect_get_vec_def_for_stmt_copy
1509 Return a vector-def for an operand. This function is used when the
1510 vectorized stmt to be created (by the caller to this function) is a "copy"
1511 created in case the vectorized result cannot fit in one vector, and several
1512 copies of the vector-stmt are required. In this case the vector-def is
1513 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1514 of the stmt that defines VEC_OPRND.
1515 DT is the type of the vector def VEC_OPRND.
1518 In case the vectorization factor (VF) is bigger than the number
1519 of elements that can fit in a vectype (nunits), we have to generate
1520 more than one vector stmt to vectorize the scalar stmt. This situation
1521 arises when there are multiple data-types operated upon in the loop; the
1522 smallest data-type determines the VF, and as a result, when vectorizing
1523 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1524 vector stmt (each computing a vector of 'nunits' results, and together
1525 computing 'VF' results in each iteration). This function is called when
1526 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1527 which VF=16 and nunits=4, so the number of copies required is 4):
1529 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1531 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1532 VS1.1: vx.1 = memref1 VS1.2
1533 VS1.2: vx.2 = memref2 VS1.3
1534 VS1.3: vx.3 = memref3
1536 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1537 VSnew.1: vz1 = vx.1 + ... VSnew.2
1538 VSnew.2: vz2 = vx.2 + ... VSnew.3
1539 VSnew.3: vz3 = vx.3 + ...
1541 The vectorization of S1 is explained in vectorizable_load.
1542 The vectorization of S2:
1543 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1544 the function 'vect_get_vec_def_for_operand' is called to
1545 get the relevant vector-def for each operand of S2. For operand x it
1546 returns the vector-def 'vx.0'.
1548 To create the remaining copies of the vector-stmt (VSnew.j), this
1549 function is called to get the relevant vector-def for each operand. It is
1550 obtained from the respective VS1.j stmt, which is recorded in the
1551 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1553 For example, to obtain the vector-def 'vx.1' in order to create the
1554 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1555 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1556 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1557 and return its def ('vx.1').
1558 Overall, to create the above sequence this function will be called 3 times:
1559 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1560 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1561 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1564 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1566 gimple vec_stmt_for_operand
;
1567 stmt_vec_info def_stmt_info
;
1569 /* Do nothing; can reuse same def. */
1570 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1573 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1574 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1575 gcc_assert (def_stmt_info
);
1576 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1577 gcc_assert (vec_stmt_for_operand
);
1578 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1579 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1580 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1582 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1587 /* Get vectorized definitions for the operands to create a copy of an original
1588 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1591 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1592 vec
<tree
> *vec_oprnds0
,
1593 vec
<tree
> *vec_oprnds1
)
1595 tree vec_oprnd
= vec_oprnds0
->pop ();
1597 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1598 vec_oprnds0
->quick_push (vec_oprnd
);
1600 if (vec_oprnds1
&& vec_oprnds1
->length ())
1602 vec_oprnd
= vec_oprnds1
->pop ();
1603 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1604 vec_oprnds1
->quick_push (vec_oprnd
);
1609 /* Get vectorized definitions for OP0 and OP1.
1610 REDUC_INDEX is the index of reduction operand in case of reduction,
1611 and -1 otherwise. */
1614 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1615 vec
<tree
> *vec_oprnds0
,
1616 vec
<tree
> *vec_oprnds1
,
1617 slp_tree slp_node
, int reduc_index
)
1621 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1622 auto_vec
<tree
> ops (nops
);
1623 auto_vec
<vec
<tree
> > vec_defs (nops
);
1625 ops
.quick_push (op0
);
1627 ops
.quick_push (op1
);
1629 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1631 *vec_oprnds0
= vec_defs
[0];
1633 *vec_oprnds1
= vec_defs
[1];
1639 vec_oprnds0
->create (1);
1640 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1641 vec_oprnds0
->quick_push (vec_oprnd
);
1645 vec_oprnds1
->create (1);
1646 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1647 vec_oprnds1
->quick_push (vec_oprnd
);
1653 /* Function vect_finish_stmt_generation.
1655 Insert a new stmt. */
1658 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1659 gimple_stmt_iterator
*gsi
)
1661 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1662 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1663 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1665 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1667 if (!gsi_end_p (*gsi
)
1668 && gimple_has_mem_ops (vec_stmt
))
1670 gimple at_stmt
= gsi_stmt (*gsi
);
1671 tree vuse
= gimple_vuse (at_stmt
);
1672 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1674 tree vdef
= gimple_vdef (at_stmt
);
1675 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1676 /* If we have an SSA vuse and insert a store, update virtual
1677 SSA form to avoid triggering the renamer. Do so only
1678 if we can easily see all uses - which is what almost always
1679 happens with the way vectorized stmts are inserted. */
1680 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1681 && ((is_gimple_assign (vec_stmt
)
1682 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1683 || (is_gimple_call (vec_stmt
)
1684 && !(gimple_call_flags (vec_stmt
)
1685 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1687 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1688 gimple_set_vdef (vec_stmt
, new_vdef
);
1689 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1693 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1695 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1698 if (dump_enabled_p ())
1700 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1701 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1702 dump_printf (MSG_NOTE
, "\n");
1705 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1707 /* While EH edges will generally prevent vectorization, stmt might
1708 e.g. be in a must-not-throw region. Ensure newly created stmts
1709 that could throw are part of the same region. */
1710 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1711 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1712 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1715 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1716 a function declaration if the target has a vectorized version
1717 of the function, or NULL_TREE if the function cannot be vectorized. */
1720 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1722 tree fndecl
= gimple_call_fndecl (call
);
1724 /* We only handle functions that do not read or clobber memory -- i.e.
1725 const or novops ones. */
1726 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1730 || TREE_CODE (fndecl
) != FUNCTION_DECL
1731 || !DECL_BUILT_IN (fndecl
))
1734 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1739 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1740 gimple_stmt_iterator
*);
1743 /* Function vectorizable_mask_load_store.
1745 Check if STMT performs a conditional load or store that can be vectorized.
1746 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1747 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1751 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1752 gimple
*vec_stmt
, slp_tree slp_node
)
1754 tree vec_dest
= NULL
;
1755 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1756 stmt_vec_info prev_stmt_info
;
1757 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1758 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1759 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1760 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1761 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1765 tree dataref_ptr
= NULL_TREE
;
1767 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1771 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1772 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1773 int gather_scale
= 1;
1774 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1779 enum vect_def_type dt
;
1781 if (slp_node
!= NULL
)
1784 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1785 gcc_assert (ncopies
>= 1);
1787 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1788 mask
= gimple_call_arg (stmt
, 2);
1789 if (TYPE_PRECISION (TREE_TYPE (mask
))
1790 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1793 /* FORNOW. This restriction should be relaxed. */
1794 if (nested_in_vect_loop
&& ncopies
> 1)
1796 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1798 "multiple types in nested loop.");
1802 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1805 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1808 if (!STMT_VINFO_DATA_REF (stmt_info
))
1811 elem_type
= TREE_TYPE (vectype
);
1813 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1816 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1819 if (STMT_VINFO_GATHER_P (stmt_info
))
1823 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1824 &gather_off
, &gather_scale
);
1825 gcc_assert (gather_decl
);
1826 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1827 &def_stmt
, &def
, &gather_dt
,
1828 &gather_off_vectype
))
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1832 "gather index use not simple.");
1836 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1838 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1839 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1843 "masked gather with integer mask not supported.");
1847 else if (tree_int_cst_compare (nested_in_vect_loop
1848 ? STMT_VINFO_DR_STEP (stmt_info
)
1849 : DR_STEP (dr
), size_zero_node
) <= 0)
1851 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1852 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1855 if (TREE_CODE (mask
) != SSA_NAME
)
1858 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1859 &def_stmt
, &def
, &dt
))
1864 tree rhs
= gimple_call_arg (stmt
, 3);
1865 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1866 &def_stmt
, &def
, &dt
))
1870 if (!vec_stmt
) /* transformation not required. */
1872 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1874 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1877 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1883 if (STMT_VINFO_GATHER_P (stmt_info
))
1885 tree vec_oprnd0
= NULL_TREE
, op
;
1886 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1887 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1888 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1889 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1890 tree mask_perm_mask
= NULL_TREE
;
1891 edge pe
= loop_preheader_edge (loop
);
1894 enum { NARROW
, NONE
, WIDEN
} modifier
;
1895 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1897 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1898 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1899 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1900 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1901 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1902 scaletype
= TREE_VALUE (arglist
);
1903 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1904 && types_compatible_p (srctype
, masktype
));
1906 if (nunits
== gather_off_nunits
)
1908 else if (nunits
== gather_off_nunits
/ 2)
1910 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1913 for (i
= 0; i
< gather_off_nunits
; ++i
)
1914 sel
[i
] = i
| nunits
;
1916 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
1917 gcc_assert (perm_mask
!= NULL_TREE
);
1919 else if (nunits
== gather_off_nunits
* 2)
1921 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1924 for (i
= 0; i
< nunits
; ++i
)
1925 sel
[i
] = i
< gather_off_nunits
1926 ? i
: i
+ nunits
- gather_off_nunits
;
1928 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
1929 gcc_assert (perm_mask
!= NULL_TREE
);
1931 for (i
= 0; i
< nunits
; ++i
)
1932 sel
[i
] = i
| gather_off_nunits
;
1933 mask_perm_mask
= vect_gen_perm_mask (masktype
, sel
);
1934 gcc_assert (mask_perm_mask
!= NULL_TREE
);
1939 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1941 ptr
= fold_convert (ptrtype
, gather_base
);
1942 if (!is_gimple_min_invariant (ptr
))
1944 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1945 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1946 gcc_assert (!new_bb
);
1949 scale
= build_int_cst (scaletype
, gather_scale
);
1951 prev_stmt_info
= NULL
;
1952 for (j
= 0; j
< ncopies
; ++j
)
1954 if (modifier
== WIDEN
&& (j
& 1))
1955 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1956 perm_mask
, stmt
, gsi
);
1959 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1962 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1964 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1967 == TYPE_VECTOR_SUBPARTS (idxtype
));
1968 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1969 var
= make_ssa_name (var
, NULL
);
1970 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1972 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
1974 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1978 if (mask_perm_mask
&& (j
& 1))
1979 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1980 mask_perm_mask
, stmt
, gsi
);
1984 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1987 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1988 &def_stmt
, &def
, &dt
);
1989 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1993 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1995 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1996 == TYPE_VECTOR_SUBPARTS (masktype
));
1997 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
1999 var
= make_ssa_name (var
, NULL
);
2000 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2002 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
2003 mask_op
, NULL_TREE
);
2004 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2010 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2013 if (!useless_type_conversion_p (vectype
, rettype
))
2015 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2016 == TYPE_VECTOR_SUBPARTS (rettype
));
2017 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2018 op
= make_ssa_name (var
, new_stmt
);
2019 gimple_call_set_lhs (new_stmt
, op
);
2020 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2021 var
= make_ssa_name (vec_dest
, NULL
);
2022 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2024 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
2029 var
= make_ssa_name (vec_dest
, new_stmt
);
2030 gimple_call_set_lhs (new_stmt
, var
);
2033 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2035 if (modifier
== NARROW
)
2042 var
= permute_vec_elements (prev_res
, var
,
2043 perm_mask
, stmt
, gsi
);
2044 new_stmt
= SSA_NAME_DEF_STMT (var
);
2047 if (prev_stmt_info
== NULL
)
2048 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2050 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2051 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2054 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2056 tree lhs
= gimple_call_lhs (stmt
);
2057 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2058 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2059 set_vinfo_for_stmt (stmt
, NULL
);
2060 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2061 gsi_replace (gsi
, new_stmt
, true);
2066 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2067 prev_stmt_info
= NULL
;
2068 for (i
= 0; i
< ncopies
; i
++)
2070 unsigned align
, misalign
;
2074 tree rhs
= gimple_call_arg (stmt
, 3);
2075 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2076 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2077 /* We should have catched mismatched types earlier. */
2078 gcc_assert (useless_type_conversion_p (vectype
,
2079 TREE_TYPE (vec_rhs
)));
2080 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2081 NULL_TREE
, &dummy
, gsi
,
2082 &ptr_incr
, false, &inv_p
);
2083 gcc_assert (!inv_p
);
2087 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2089 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2090 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2092 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2093 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2094 TYPE_SIZE_UNIT (vectype
));
2097 align
= TYPE_ALIGN_UNIT (vectype
);
2098 if (aligned_access_p (dr
))
2100 else if (DR_MISALIGNMENT (dr
) == -1)
2102 align
= TYPE_ALIGN_UNIT (elem_type
);
2106 misalign
= DR_MISALIGNMENT (dr
);
2107 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2110 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2111 gimple_call_arg (stmt
, 1),
2113 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2115 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2117 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2118 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2123 tree vec_mask
= NULL_TREE
;
2124 prev_stmt_info
= NULL
;
2125 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2126 for (i
= 0; i
< ncopies
; i
++)
2128 unsigned align
, misalign
;
2132 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2133 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2134 NULL_TREE
, &dummy
, gsi
,
2135 &ptr_incr
, false, &inv_p
);
2136 gcc_assert (!inv_p
);
2140 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2142 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2143 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2144 TYPE_SIZE_UNIT (vectype
));
2147 align
= TYPE_ALIGN_UNIT (vectype
);
2148 if (aligned_access_p (dr
))
2150 else if (DR_MISALIGNMENT (dr
) == -1)
2152 align
= TYPE_ALIGN_UNIT (elem_type
);
2156 misalign
= DR_MISALIGNMENT (dr
);
2157 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2160 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2161 gimple_call_arg (stmt
, 1),
2163 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
, NULL
));
2164 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2166 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2168 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2169 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2175 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2177 tree lhs
= gimple_call_lhs (stmt
);
2178 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2179 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2180 set_vinfo_for_stmt (stmt
, NULL
);
2181 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2182 gsi_replace (gsi
, new_stmt
, true);
2189 /* Function vectorizable_call.
2191 Check if GS performs a function call that can be vectorized.
2192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2197 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2204 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2205 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2206 tree vectype_out
, vectype_in
;
2209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2210 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2211 tree fndecl
, new_temp
, def
, rhs_type
;
2213 enum vect_def_type dt
[3]
2214 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2215 gimple new_stmt
= NULL
;
2217 vec
<tree
> vargs
= vNULL
;
2218 enum { NARROW
, NONE
, WIDEN
} modifier
;
2222 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2225 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2228 /* Is GS a vectorizable call? */
2229 stmt
= dyn_cast
<gcall
*> (gs
);
2233 if (gimple_call_internal_p (stmt
)
2234 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2235 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2236 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2239 if (gimple_call_lhs (stmt
) == NULL_TREE
2240 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2243 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2245 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2247 /* Process function arguments. */
2248 rhs_type
= NULL_TREE
;
2249 vectype_in
= NULL_TREE
;
2250 nargs
= gimple_call_num_args (stmt
);
2252 /* Bail out if the function has more than three arguments, we do not have
2253 interesting builtin functions to vectorize with more than two arguments
2254 except for fma. No arguments is also not good. */
2255 if (nargs
== 0 || nargs
> 3)
2258 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2259 if (gimple_call_internal_p (stmt
)
2260 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2263 rhs_type
= unsigned_type_node
;
2266 for (i
= 0; i
< nargs
; i
++)
2270 op
= gimple_call_arg (stmt
, i
);
2272 /* We can only handle calls with arguments of the same type. */
2274 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2276 if (dump_enabled_p ())
2277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2278 "argument types differ.\n");
2282 rhs_type
= TREE_TYPE (op
);
2284 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2285 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2287 if (dump_enabled_p ())
2288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2289 "use not simple.\n");
2294 vectype_in
= opvectype
;
2296 && opvectype
!= vectype_in
)
2298 if (dump_enabled_p ())
2299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2300 "argument vector types differ.\n");
2304 /* If all arguments are external or constant defs use a vector type with
2305 the same size as the output vector type. */
2307 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2309 gcc_assert (vectype_in
);
2312 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2315 "no vectype for scalar type ");
2316 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2317 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2324 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2325 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2326 if (nunits_in
== nunits_out
/ 2)
2328 else if (nunits_out
== nunits_in
)
2330 else if (nunits_out
== nunits_in
/ 2)
2335 /* For now, we only vectorize functions if a target specific builtin
2336 is available. TODO -- in some cases, it might be profitable to
2337 insert the calls for pieces of the vector, in order to be able
2338 to vectorize other operations in the loop. */
2339 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2340 if (fndecl
== NULL_TREE
)
2342 if (gimple_call_internal_p (stmt
)
2343 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2346 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2347 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2348 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2349 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2351 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2352 { 0, 1, 2, ... vf - 1 } vector. */
2353 gcc_assert (nargs
== 0);
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2359 "function is not vectorizable.\n");
2364 gcc_assert (!gimple_vuse (stmt
));
2366 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2368 else if (modifier
== NARROW
)
2369 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2371 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2373 /* Sanity check: make sure that at least one copy of the vectorized stmt
2374 needs to be generated. */
2375 gcc_assert (ncopies
>= 1);
2377 if (!vec_stmt
) /* transformation not required. */
2379 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2383 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2389 if (dump_enabled_p ())
2390 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2393 scalar_dest
= gimple_call_lhs (stmt
);
2394 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2396 prev_stmt_info
= NULL
;
2400 for (j
= 0; j
< ncopies
; ++j
)
2402 /* Build argument list for the vectorized call. */
2404 vargs
.create (nargs
);
2410 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2411 vec
<tree
> vec_oprnds0
;
2413 for (i
= 0; i
< nargs
; i
++)
2414 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2415 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2416 vec_oprnds0
= vec_defs
[0];
2418 /* Arguments are ready. Create the new vector stmt. */
2419 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2422 for (k
= 0; k
< nargs
; k
++)
2424 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2425 vargs
[k
] = vec_oprndsk
[i
];
2427 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2428 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2429 gimple_call_set_lhs (new_stmt
, new_temp
);
2430 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2431 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2434 for (i
= 0; i
< nargs
; i
++)
2436 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2437 vec_oprndsi
.release ();
2442 for (i
= 0; i
< nargs
; i
++)
2444 op
= gimple_call_arg (stmt
, i
);
2447 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2450 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2452 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2455 vargs
.quick_push (vec_oprnd0
);
2458 if (gimple_call_internal_p (stmt
)
2459 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2461 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2463 for (k
= 0; k
< nunits_out
; ++k
)
2464 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2465 tree cst
= build_vector (vectype_out
, v
);
2467 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2468 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2469 new_temp
= make_ssa_name (new_var
, init_stmt
);
2470 gimple_assign_set_lhs (init_stmt
, new_temp
);
2471 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2472 new_temp
= make_ssa_name (vec_dest
, NULL
);
2473 new_stmt
= gimple_build_assign (new_temp
,
2474 gimple_assign_lhs (init_stmt
));
2478 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2479 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2480 gimple_call_set_lhs (new_stmt
, new_temp
);
2482 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2485 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2487 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2489 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2495 for (j
= 0; j
< ncopies
; ++j
)
2497 /* Build argument list for the vectorized call. */
2499 vargs
.create (nargs
* 2);
2505 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2506 vec
<tree
> vec_oprnds0
;
2508 for (i
= 0; i
< nargs
; i
++)
2509 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2510 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2511 vec_oprnds0
= vec_defs
[0];
2513 /* Arguments are ready. Create the new vector stmt. */
2514 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2518 for (k
= 0; k
< nargs
; k
++)
2520 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2521 vargs
.quick_push (vec_oprndsk
[i
]);
2522 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2524 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2525 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2526 gimple_call_set_lhs (new_stmt
, new_temp
);
2527 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2528 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2531 for (i
= 0; i
< nargs
; i
++)
2533 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2534 vec_oprndsi
.release ();
2539 for (i
= 0; i
< nargs
; i
++)
2541 op
= gimple_call_arg (stmt
, i
);
2545 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2547 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2551 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2553 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2555 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2558 vargs
.quick_push (vec_oprnd0
);
2559 vargs
.quick_push (vec_oprnd1
);
2562 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2563 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2564 gimple_call_set_lhs (new_stmt
, new_temp
);
2565 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2568 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2570 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2572 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2575 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2580 /* No current target implements this case. */
2586 /* The call in STMT might prevent it from being removed in dce.
2587 We however cannot remove it here, due to the way the ssa name
2588 it defines is mapped to the new definition. So just replace
2589 rhs of the statement with something harmless. */
2594 type
= TREE_TYPE (scalar_dest
);
2595 if (is_pattern_stmt_p (stmt_info
))
2596 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2598 lhs
= gimple_call_lhs (stmt
);
2599 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2600 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2601 set_vinfo_for_stmt (stmt
, NULL
);
2602 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2603 gsi_replace (gsi
, new_stmt
, false);
2609 struct simd_call_arg_info
2613 enum vect_def_type dt
;
2614 HOST_WIDE_INT linear_step
;
2618 /* Function vectorizable_simd_clone_call.
2620 Check if STMT performs a function call that can be vectorized
2621 by calling a simd clone of the function.
2622 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2623 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2624 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2627 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2628 gimple
*vec_stmt
, slp_tree slp_node
)
2633 tree vec_oprnd0
= NULL_TREE
;
2634 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2636 unsigned int nunits
;
2637 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2638 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2639 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2640 tree fndecl
, new_temp
, def
;
2642 gimple new_stmt
= NULL
;
2644 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2645 vec
<tree
> vargs
= vNULL
;
2647 tree lhs
, rtype
, ratype
;
2648 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2650 /* Is STMT a vectorizable call? */
2651 if (!is_gimple_call (stmt
))
2654 fndecl
= gimple_call_fndecl (stmt
);
2655 if (fndecl
== NULL_TREE
)
2658 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2659 if (node
== NULL
|| node
->simd_clones
== NULL
)
2662 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2665 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2668 if (gimple_call_lhs (stmt
)
2669 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2672 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2674 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2676 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2680 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2683 /* Process function arguments. */
2684 nargs
= gimple_call_num_args (stmt
);
2686 /* Bail out if the function has zero arguments. */
2690 arginfo
.create (nargs
);
2692 for (i
= 0; i
< nargs
; i
++)
2694 simd_call_arg_info thisarginfo
;
2697 thisarginfo
.linear_step
= 0;
2698 thisarginfo
.align
= 0;
2699 thisarginfo
.op
= NULL_TREE
;
2701 op
= gimple_call_arg (stmt
, i
);
2702 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2703 &def_stmt
, &def
, &thisarginfo
.dt
,
2704 &thisarginfo
.vectype
)
2705 || thisarginfo
.dt
== vect_uninitialized_def
)
2707 if (dump_enabled_p ())
2708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2709 "use not simple.\n");
2714 if (thisarginfo
.dt
== vect_constant_def
2715 || thisarginfo
.dt
== vect_external_def
)
2716 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2718 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2720 if (thisarginfo
.dt
!= vect_constant_def
2721 && thisarginfo
.dt
!= vect_external_def
2723 && TREE_CODE (op
) == SSA_NAME
2724 && simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false)
2725 && tree_fits_shwi_p (iv
.step
))
2727 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2728 thisarginfo
.op
= iv
.base
;
2730 else if ((thisarginfo
.dt
== vect_constant_def
2731 || thisarginfo
.dt
== vect_external_def
)
2732 && POINTER_TYPE_P (TREE_TYPE (op
)))
2733 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2735 arginfo
.quick_push (thisarginfo
);
2738 unsigned int badness
= 0;
2739 struct cgraph_node
*bestn
= NULL
;
2740 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
))
2741 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
));
2743 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2744 n
= n
->simdclone
->next_clone
)
2746 unsigned int this_badness
= 0;
2747 if (n
->simdclone
->simdlen
2748 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2749 || n
->simdclone
->nargs
!= nargs
)
2751 if (n
->simdclone
->simdlen
2752 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2753 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2754 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2755 if (n
->simdclone
->inbranch
)
2756 this_badness
+= 2048;
2757 int target_badness
= targetm
.simd_clone
.usable (n
);
2758 if (target_badness
< 0)
2760 this_badness
+= target_badness
* 512;
2761 /* FORNOW: Have to add code to add the mask argument. */
2762 if (n
->simdclone
->inbranch
)
2764 for (i
= 0; i
< nargs
; i
++)
2766 switch (n
->simdclone
->args
[i
].arg_type
)
2768 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2769 if (!useless_type_conversion_p
2770 (n
->simdclone
->args
[i
].orig_type
,
2771 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2773 else if (arginfo
[i
].dt
== vect_constant_def
2774 || arginfo
[i
].dt
== vect_external_def
2775 || arginfo
[i
].linear_step
)
2778 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2779 if (arginfo
[i
].dt
!= vect_constant_def
2780 && arginfo
[i
].dt
!= vect_external_def
)
2783 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2784 if (arginfo
[i
].dt
== vect_constant_def
2785 || arginfo
[i
].dt
== vect_external_def
2786 || (arginfo
[i
].linear_step
2787 != n
->simdclone
->args
[i
].linear_step
))
2790 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2794 case SIMD_CLONE_ARG_TYPE_MASK
:
2797 if (i
== (size_t) -1)
2799 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2804 if (arginfo
[i
].align
)
2805 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2806 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2808 if (i
== (size_t) -1)
2810 if (bestn
== NULL
|| this_badness
< badness
)
2813 badness
= this_badness
;
2823 for (i
= 0; i
< nargs
; i
++)
2824 if ((arginfo
[i
].dt
== vect_constant_def
2825 || arginfo
[i
].dt
== vect_external_def
)
2826 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2829 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2831 if (arginfo
[i
].vectype
== NULL
2832 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2833 > bestn
->simdclone
->simdlen
))
2840 fndecl
= bestn
->decl
;
2841 nunits
= bestn
->simdclone
->simdlen
;
2842 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2844 /* If the function isn't const, only allow it in simd loops where user
2845 has asserted that at least nunits consecutive iterations can be
2846 performed using SIMD instructions. */
2847 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2848 && gimple_vuse (stmt
))
2854 /* Sanity check: make sure that at least one copy of the vectorized stmt
2855 needs to be generated. */
2856 gcc_assert (ncopies
>= 1);
2858 if (!vec_stmt
) /* transformation not required. */
2860 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
) = bestn
->decl
;
2861 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_NOTE
, vect_location
,
2864 "=== vectorizable_simd_clone_call ===\n");
2865 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2872 if (dump_enabled_p ())
2873 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2876 scalar_dest
= gimple_call_lhs (stmt
);
2877 vec_dest
= NULL_TREE
;
2882 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2883 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2884 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2887 rtype
= TREE_TYPE (ratype
);
2891 prev_stmt_info
= NULL
;
2892 for (j
= 0; j
< ncopies
; ++j
)
2894 /* Build argument list for the vectorized call. */
2896 vargs
.create (nargs
);
2900 for (i
= 0; i
< nargs
; i
++)
2902 unsigned int k
, l
, m
, o
;
2904 op
= gimple_call_arg (stmt
, i
);
2905 switch (bestn
->simdclone
->args
[i
].arg_type
)
2907 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2908 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2909 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2910 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2912 if (TYPE_VECTOR_SUBPARTS (atype
)
2913 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2915 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2916 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2917 / TYPE_VECTOR_SUBPARTS (atype
));
2918 gcc_assert ((k
& (k
- 1)) == 0);
2921 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2924 vec_oprnd0
= arginfo
[i
].op
;
2925 if ((m
& (k
- 1)) == 0)
2927 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2930 arginfo
[i
].op
= vec_oprnd0
;
2932 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2934 bitsize_int ((m
& (k
- 1)) * prec
));
2936 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2938 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2939 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2943 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2944 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2945 gcc_assert ((k
& (k
- 1)) == 0);
2946 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2948 vec_alloc (ctor_elts
, k
);
2951 for (l
= 0; l
< k
; l
++)
2953 if (m
== 0 && l
== 0)
2955 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2958 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2960 arginfo
[i
].op
= vec_oprnd0
;
2963 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
2967 vargs
.safe_push (vec_oprnd0
);
2970 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
2972 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2974 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2975 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2980 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2981 vargs
.safe_push (op
);
2983 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2988 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
2993 edge pe
= loop_preheader_edge (loop
);
2994 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2995 gcc_assert (!new_bb
);
2997 tree phi_res
= copy_ssa_name (op
, NULL
);
2998 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
2999 set_vinfo_for_stmt (new_phi
,
3000 new_stmt_vec_info (new_phi
, loop_vinfo
,
3002 add_phi_arg (new_phi
, arginfo
[i
].op
,
3003 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3005 = POINTER_TYPE_P (TREE_TYPE (op
))
3006 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3007 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3008 ? sizetype
: TREE_TYPE (op
);
3010 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3012 tree tcst
= wide_int_to_tree (type
, cst
);
3013 tree phi_arg
= copy_ssa_name (op
, NULL
);
3014 new_stmt
= gimple_build_assign_with_ops (code
, phi_arg
,
3016 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3017 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3018 set_vinfo_for_stmt (new_stmt
,
3019 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3021 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3023 arginfo
[i
].op
= phi_res
;
3024 vargs
.safe_push (phi_res
);
3029 = POINTER_TYPE_P (TREE_TYPE (op
))
3030 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3031 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3032 ? sizetype
: TREE_TYPE (op
);
3034 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3036 tree tcst
= wide_int_to_tree (type
, cst
);
3037 new_temp
= make_ssa_name (TREE_TYPE (op
), NULL
);
3039 = gimple_build_assign_with_ops (code
, new_temp
,
3040 arginfo
[i
].op
, tcst
);
3041 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3042 vargs
.safe_push (new_temp
);
3045 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3051 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3054 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3056 new_temp
= create_tmp_var (ratype
, NULL
);
3057 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3058 == TYPE_VECTOR_SUBPARTS (rtype
))
3059 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3061 new_temp
= make_ssa_name (rtype
, new_stmt
);
3062 gimple_call_set_lhs (new_stmt
, new_temp
);
3064 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3068 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3071 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3072 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3073 gcc_assert ((k
& (k
- 1)) == 0);
3074 for (l
= 0; l
< k
; l
++)
3079 t
= build_fold_addr_expr (new_temp
);
3080 t
= build2 (MEM_REF
, vectype
, t
,
3081 build_int_cst (TREE_TYPE (t
),
3082 l
* prec
/ BITS_PER_UNIT
));
3085 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3086 size_int (prec
), bitsize_int (l
* prec
));
3088 = gimple_build_assign (make_ssa_name (vectype
, NULL
), t
);
3089 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3090 if (j
== 0 && l
== 0)
3091 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3093 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3095 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3100 tree clobber
= build_constructor (ratype
, NULL
);
3101 TREE_THIS_VOLATILE (clobber
) = 1;
3102 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3103 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3107 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3109 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3110 / TYPE_VECTOR_SUBPARTS (rtype
));
3111 gcc_assert ((k
& (k
- 1)) == 0);
3112 if ((j
& (k
- 1)) == 0)
3113 vec_alloc (ret_ctor_elts
, k
);
3116 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3117 for (m
= 0; m
< o
; m
++)
3119 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3120 size_int (m
), NULL_TREE
, NULL_TREE
);
3122 = gimple_build_assign (make_ssa_name (rtype
, NULL
),
3124 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3125 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3126 gimple_assign_lhs (new_stmt
));
3128 tree clobber
= build_constructor (ratype
, NULL
);
3129 TREE_THIS_VOLATILE (clobber
) = 1;
3130 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3131 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3134 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3135 if ((j
& (k
- 1)) != k
- 1)
3137 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3139 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
),
3141 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3143 if ((unsigned) j
== k
- 1)
3144 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3146 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3148 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3153 tree t
= build_fold_addr_expr (new_temp
);
3154 t
= build2 (MEM_REF
, vectype
, t
,
3155 build_int_cst (TREE_TYPE (t
), 0));
3157 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
), t
);
3158 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3159 tree clobber
= build_constructor (ratype
, NULL
);
3160 TREE_THIS_VOLATILE (clobber
) = 1;
3161 vect_finish_stmt_generation (stmt
,
3162 gimple_build_assign (new_temp
,
3168 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3170 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3172 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3177 /* The call in STMT might prevent it from being removed in dce.
3178 We however cannot remove it here, due to the way the ssa name
3179 it defines is mapped to the new definition. So just replace
3180 rhs of the statement with something harmless. */
3187 type
= TREE_TYPE (scalar_dest
);
3188 if (is_pattern_stmt_p (stmt_info
))
3189 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3191 lhs
= gimple_call_lhs (stmt
);
3192 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3195 new_stmt
= gimple_build_nop ();
3196 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3197 set_vinfo_for_stmt (stmt
, NULL
);
3198 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3199 gsi_replace (gsi
, new_stmt
, false);
3200 unlink_stmt_vdef (stmt
);
3206 /* Function vect_gen_widened_results_half
3208 Create a vector stmt whose code, type, number of arguments, and result
3209 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3210 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3211 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3212 needs to be created (DECL is a function-decl of a target-builtin).
3213 STMT is the original scalar stmt that we are vectorizing. */
3216 vect_gen_widened_results_half (enum tree_code code
,
3218 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3219 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3225 /* Generate half of the widened result: */
3226 if (code
== CALL_EXPR
)
3228 /* Target specific support */
3229 if (op_type
== binary_op
)
3230 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3232 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3233 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3234 gimple_call_set_lhs (new_stmt
, new_temp
);
3238 /* Generic support */
3239 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3240 if (op_type
!= binary_op
)
3242 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
3244 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3245 gimple_assign_set_lhs (new_stmt
, new_temp
);
3247 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3253 /* Get vectorized definitions for loop-based vectorization. For the first
3254 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3255 scalar operand), and for the rest we get a copy with
3256 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3257 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3258 The vectors are collected into VEC_OPRNDS. */
3261 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3262 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3266 /* Get first vector operand. */
3267 /* All the vector operands except the very first one (that is scalar oprnd)
3269 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3270 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3272 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3274 vec_oprnds
->quick_push (vec_oprnd
);
3276 /* Get second vector operand. */
3277 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3278 vec_oprnds
->quick_push (vec_oprnd
);
3282 /* For conversion in multiple steps, continue to get operands
3285 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3289 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3290 For multi-step conversions store the resulting vectors and call the function
3294 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3295 int multi_step_cvt
, gimple stmt
,
3297 gimple_stmt_iterator
*gsi
,
3298 slp_tree slp_node
, enum tree_code code
,
3299 stmt_vec_info
*prev_stmt_info
)
3302 tree vop0
, vop1
, new_tmp
, vec_dest
;
3304 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3306 vec_dest
= vec_dsts
.pop ();
3308 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3310 /* Create demotion operation. */
3311 vop0
= (*vec_oprnds
)[i
];
3312 vop1
= (*vec_oprnds
)[i
+ 1];
3313 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3314 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3315 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3316 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3319 /* Store the resulting vector for next recursive call. */
3320 (*vec_oprnds
)[i
/2] = new_tmp
;
3323 /* This is the last step of the conversion sequence. Store the
3324 vectors in SLP_NODE or in vector info of the scalar statement
3325 (or in STMT_VINFO_RELATED_STMT chain). */
3327 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3330 if (!*prev_stmt_info
)
3331 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3333 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3335 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3340 /* For multi-step demotion operations we first generate demotion operations
3341 from the source type to the intermediate types, and then combine the
3342 results (stored in VEC_OPRNDS) in demotion operation to the destination
3346 /* At each level of recursion we have half of the operands we had at the
3348 vec_oprnds
->truncate ((i
+1)/2);
3349 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3350 stmt
, vec_dsts
, gsi
, slp_node
,
3351 VEC_PACK_TRUNC_EXPR
,
3355 vec_dsts
.quick_push (vec_dest
);
3359 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3360 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3361 the resulting vectors and call the function recursively. */
3364 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3365 vec
<tree
> *vec_oprnds1
,
3366 gimple stmt
, tree vec_dest
,
3367 gimple_stmt_iterator
*gsi
,
3368 enum tree_code code1
,
3369 enum tree_code code2
, tree decl1
,
3370 tree decl2
, int op_type
)
3373 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3374 gimple new_stmt1
, new_stmt2
;
3375 vec
<tree
> vec_tmp
= vNULL
;
3377 vec_tmp
.create (vec_oprnds0
->length () * 2);
3378 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3380 if (op_type
== binary_op
)
3381 vop1
= (*vec_oprnds1
)[i
];
3385 /* Generate the two halves of promotion operation. */
3386 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3387 op_type
, vec_dest
, gsi
, stmt
);
3388 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3389 op_type
, vec_dest
, gsi
, stmt
);
3390 if (is_gimple_call (new_stmt1
))
3392 new_tmp1
= gimple_call_lhs (new_stmt1
);
3393 new_tmp2
= gimple_call_lhs (new_stmt2
);
3397 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3398 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3401 /* Store the results for the next step. */
3402 vec_tmp
.quick_push (new_tmp1
);
3403 vec_tmp
.quick_push (new_tmp2
);
3406 vec_oprnds0
->release ();
3407 *vec_oprnds0
= vec_tmp
;
3411 /* Check if STMT performs a conversion operation, that can be vectorized.
3412 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3413 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3414 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3417 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3418 gimple
*vec_stmt
, slp_tree slp_node
)
3422 tree op0
, op1
= NULL_TREE
;
3423 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3424 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3425 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3426 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3427 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3428 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3432 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3433 gimple new_stmt
= NULL
;
3434 stmt_vec_info prev_stmt_info
;
3437 tree vectype_out
, vectype_in
;
3439 tree lhs_type
, rhs_type
;
3440 enum { NARROW
, NONE
, WIDEN
} modifier
;
3441 vec
<tree
> vec_oprnds0
= vNULL
;
3442 vec
<tree
> vec_oprnds1
= vNULL
;
3444 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3445 int multi_step_cvt
= 0;
3446 vec
<tree
> vec_dsts
= vNULL
;
3447 vec
<tree
> interm_types
= vNULL
;
3448 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3450 enum machine_mode rhs_mode
;
3451 unsigned short fltsz
;
3453 /* Is STMT a vectorizable conversion? */
3455 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3458 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3461 if (!is_gimple_assign (stmt
))
3464 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3467 code
= gimple_assign_rhs_code (stmt
);
3468 if (!CONVERT_EXPR_CODE_P (code
)
3469 && code
!= FIX_TRUNC_EXPR
3470 && code
!= FLOAT_EXPR
3471 && code
!= WIDEN_MULT_EXPR
3472 && code
!= WIDEN_LSHIFT_EXPR
)
3475 op_type
= TREE_CODE_LENGTH (code
);
3477 /* Check types of lhs and rhs. */
3478 scalar_dest
= gimple_assign_lhs (stmt
);
3479 lhs_type
= TREE_TYPE (scalar_dest
);
3480 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3482 op0
= gimple_assign_rhs1 (stmt
);
3483 rhs_type
= TREE_TYPE (op0
);
3485 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3486 && !((INTEGRAL_TYPE_P (lhs_type
)
3487 && INTEGRAL_TYPE_P (rhs_type
))
3488 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3489 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3492 if ((INTEGRAL_TYPE_P (lhs_type
)
3493 && (TYPE_PRECISION (lhs_type
)
3494 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3495 || (INTEGRAL_TYPE_P (rhs_type
)
3496 && (TYPE_PRECISION (rhs_type
)
3497 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3499 if (dump_enabled_p ())
3500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3501 "type conversion to/from bit-precision unsupported."
3506 /* Check the operands of the operation. */
3507 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3508 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3510 if (dump_enabled_p ())
3511 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3512 "use not simple.\n");
3515 if (op_type
== binary_op
)
3519 op1
= gimple_assign_rhs2 (stmt
);
3520 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3521 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3523 if (CONSTANT_CLASS_P (op0
))
3524 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3525 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3527 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3532 if (dump_enabled_p ())
3533 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3534 "use not simple.\n");
3539 /* If op0 is an external or constant defs use a vector type of
3540 the same size as the output vector type. */
3542 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3544 gcc_assert (vectype_in
);
3547 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3550 "no vectype for scalar type ");
3551 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3552 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3558 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3559 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3560 if (nunits_in
< nunits_out
)
3562 else if (nunits_out
== nunits_in
)
3567 /* Multiple types in SLP are handled by creating the appropriate number of
3568 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3570 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3572 else if (modifier
== NARROW
)
3573 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3575 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3577 /* Sanity check: make sure that at least one copy of the vectorized stmt
3578 needs to be generated. */
3579 gcc_assert (ncopies
>= 1);
3581 /* Supportable by target? */
3585 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3587 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3592 if (dump_enabled_p ())
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3594 "conversion not supported by target.\n");
3598 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3599 &code1
, &code2
, &multi_step_cvt
,
3602 /* Binary widening operation can only be supported directly by the
3604 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3608 if (code
!= FLOAT_EXPR
3609 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3610 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3613 rhs_mode
= TYPE_MODE (rhs_type
);
3614 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3615 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3616 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3617 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3620 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3621 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3622 if (cvt_type
== NULL_TREE
)
3625 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3627 if (!supportable_convert_operation (code
, vectype_out
,
3628 cvt_type
, &decl1
, &codecvt1
))
3631 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3632 cvt_type
, &codecvt1
,
3633 &codecvt2
, &multi_step_cvt
,
3637 gcc_assert (multi_step_cvt
== 0);
3639 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3640 vectype_in
, &code1
, &code2
,
3641 &multi_step_cvt
, &interm_types
))
3645 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3648 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3649 codecvt2
= ERROR_MARK
;
3653 interm_types
.safe_push (cvt_type
);
3654 cvt_type
= NULL_TREE
;
3659 gcc_assert (op_type
== unary_op
);
3660 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3661 &code1
, &multi_step_cvt
,
3665 if (code
!= FIX_TRUNC_EXPR
3666 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3667 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3670 rhs_mode
= TYPE_MODE (rhs_type
);
3672 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3673 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3674 if (cvt_type
== NULL_TREE
)
3676 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3679 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3680 &code1
, &multi_step_cvt
,
3689 if (!vec_stmt
) /* transformation not required. */
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_NOTE
, vect_location
,
3693 "=== vectorizable_conversion ===\n");
3694 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3696 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3697 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3699 else if (modifier
== NARROW
)
3701 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3702 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3706 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3707 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3709 interm_types
.release ();
3714 if (dump_enabled_p ())
3715 dump_printf_loc (MSG_NOTE
, vect_location
,
3716 "transform conversion. ncopies = %d.\n", ncopies
);
3718 if (op_type
== binary_op
)
3720 if (CONSTANT_CLASS_P (op0
))
3721 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3722 else if (CONSTANT_CLASS_P (op1
))
3723 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3726 /* In case of multi-step conversion, we first generate conversion operations
3727 to the intermediate types, and then from that types to the final one.
3728 We create vector destinations for the intermediate type (TYPES) received
3729 from supportable_*_operation, and store them in the correct order
3730 for future use in vect_create_vectorized_*_stmts (). */
3731 vec_dsts
.create (multi_step_cvt
+ 1);
3732 vec_dest
= vect_create_destination_var (scalar_dest
,
3733 (cvt_type
&& modifier
== WIDEN
)
3734 ? cvt_type
: vectype_out
);
3735 vec_dsts
.quick_push (vec_dest
);
3739 for (i
= interm_types
.length () - 1;
3740 interm_types
.iterate (i
, &intermediate_type
); i
--)
3742 vec_dest
= vect_create_destination_var (scalar_dest
,
3744 vec_dsts
.quick_push (vec_dest
);
3749 vec_dest
= vect_create_destination_var (scalar_dest
,
3751 ? vectype_out
: cvt_type
);
3755 if (modifier
== WIDEN
)
3757 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3758 if (op_type
== binary_op
)
3759 vec_oprnds1
.create (1);
3761 else if (modifier
== NARROW
)
3762 vec_oprnds0
.create (
3763 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3765 else if (code
== WIDEN_LSHIFT_EXPR
)
3766 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3769 prev_stmt_info
= NULL
;
3773 for (j
= 0; j
< ncopies
; j
++)
3776 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3779 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3781 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3783 /* Arguments are ready, create the new vector stmt. */
3784 if (code1
== CALL_EXPR
)
3786 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3787 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3788 gimple_call_set_lhs (new_stmt
, new_temp
);
3792 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3793 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
3795 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3796 gimple_assign_set_lhs (new_stmt
, new_temp
);
3799 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3801 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3805 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3807 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3808 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3813 /* In case the vectorization factor (VF) is bigger than the number
3814 of elements that we can fit in a vectype (nunits), we have to
3815 generate more than one vector stmt - i.e - we need to "unroll"
3816 the vector stmt by a factor VF/nunits. */
3817 for (j
= 0; j
< ncopies
; j
++)
3824 if (code
== WIDEN_LSHIFT_EXPR
)
3829 /* Store vec_oprnd1 for every vector stmt to be created
3830 for SLP_NODE. We check during the analysis that all
3831 the shift arguments are the same. */
3832 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3833 vec_oprnds1
.quick_push (vec_oprnd1
);
3835 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3839 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3840 &vec_oprnds1
, slp_node
, -1);
3844 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3845 vec_oprnds0
.quick_push (vec_oprnd0
);
3846 if (op_type
== binary_op
)
3848 if (code
== WIDEN_LSHIFT_EXPR
)
3851 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3853 vec_oprnds1
.quick_push (vec_oprnd1
);
3859 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3860 vec_oprnds0
.truncate (0);
3861 vec_oprnds0
.quick_push (vec_oprnd0
);
3862 if (op_type
== binary_op
)
3864 if (code
== WIDEN_LSHIFT_EXPR
)
3867 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3869 vec_oprnds1
.truncate (0);
3870 vec_oprnds1
.quick_push (vec_oprnd1
);
3874 /* Arguments are ready. Create the new vector stmts. */
3875 for (i
= multi_step_cvt
; i
>= 0; i
--)
3877 tree this_dest
= vec_dsts
[i
];
3878 enum tree_code c1
= code1
, c2
= code2
;
3879 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3884 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3886 stmt
, this_dest
, gsi
,
3887 c1
, c2
, decl1
, decl2
,
3891 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3895 if (codecvt1
== CALL_EXPR
)
3897 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3898 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3899 gimple_call_set_lhs (new_stmt
, new_temp
);
3903 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3904 new_temp
= make_ssa_name (vec_dest
, NULL
);
3905 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
3910 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3913 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3916 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3919 if (!prev_stmt_info
)
3920 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3922 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3923 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3928 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3932 /* In case the vectorization factor (VF) is bigger than the number
3933 of elements that we can fit in a vectype (nunits), we have to
3934 generate more than one vector stmt - i.e - we need to "unroll"
3935 the vector stmt by a factor VF/nunits. */
3936 for (j
= 0; j
< ncopies
; j
++)
3940 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3944 vec_oprnds0
.truncate (0);
3945 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3946 vect_pow2 (multi_step_cvt
) - 1);
3949 /* Arguments are ready. Create the new vector stmts. */
3951 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3953 if (codecvt1
== CALL_EXPR
)
3955 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3956 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3957 gimple_call_set_lhs (new_stmt
, new_temp
);
3961 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3962 new_temp
= make_ssa_name (vec_dest
, NULL
);
3963 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
3967 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3968 vec_oprnds0
[i
] = new_temp
;
3971 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
3972 stmt
, vec_dsts
, gsi
,
3977 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3981 vec_oprnds0
.release ();
3982 vec_oprnds1
.release ();
3983 vec_dsts
.release ();
3984 interm_types
.release ();
3990 /* Function vectorizable_assignment.
3992 Check if STMT performs an assignment (copy) that can be vectorized.
3993 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3994 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3995 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3998 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3999 gimple
*vec_stmt
, slp_tree slp_node
)
4004 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4005 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4006 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4010 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4011 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4014 vec
<tree
> vec_oprnds
= vNULL
;
4016 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4017 gimple new_stmt
= NULL
;
4018 stmt_vec_info prev_stmt_info
= NULL
;
4019 enum tree_code code
;
4022 /* Multiple types in SLP are handled by creating the appropriate number of
4023 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4025 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4028 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4030 gcc_assert (ncopies
>= 1);
4032 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4035 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4038 /* Is vectorizable assignment? */
4039 if (!is_gimple_assign (stmt
))
4042 scalar_dest
= gimple_assign_lhs (stmt
);
4043 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4046 code
= gimple_assign_rhs_code (stmt
);
4047 if (gimple_assign_single_p (stmt
)
4048 || code
== PAREN_EXPR
4049 || CONVERT_EXPR_CODE_P (code
))
4050 op
= gimple_assign_rhs1 (stmt
);
4054 if (code
== VIEW_CONVERT_EXPR
)
4055 op
= TREE_OPERAND (op
, 0);
4057 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4058 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4060 if (dump_enabled_p ())
4061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4062 "use not simple.\n");
4066 /* We can handle NOP_EXPR conversions that do not change the number
4067 of elements or the vector size. */
4068 if ((CONVERT_EXPR_CODE_P (code
)
4069 || code
== VIEW_CONVERT_EXPR
)
4071 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4072 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4073 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4076 /* We do not handle bit-precision changes. */
4077 if ((CONVERT_EXPR_CODE_P (code
)
4078 || code
== VIEW_CONVERT_EXPR
)
4079 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4080 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4081 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4082 || ((TYPE_PRECISION (TREE_TYPE (op
))
4083 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4084 /* But a conversion that does not change the bit-pattern is ok. */
4085 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4086 > TYPE_PRECISION (TREE_TYPE (op
)))
4087 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4089 if (dump_enabled_p ())
4090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4091 "type conversion to/from bit-precision "
4096 if (!vec_stmt
) /* transformation not required. */
4098 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4099 if (dump_enabled_p ())
4100 dump_printf_loc (MSG_NOTE
, vect_location
,
4101 "=== vectorizable_assignment ===\n");
4102 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4107 if (dump_enabled_p ())
4108 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4111 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4114 for (j
= 0; j
< ncopies
; j
++)
4118 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4120 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4122 /* Arguments are ready. create the new vector stmt. */
4123 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4125 if (CONVERT_EXPR_CODE_P (code
)
4126 || code
== VIEW_CONVERT_EXPR
)
4127 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4128 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4129 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4130 gimple_assign_set_lhs (new_stmt
, new_temp
);
4131 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4133 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4140 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4142 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4144 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4147 vec_oprnds
.release ();
4152 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4153 either as shift by a scalar or by a vector. */
4156 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4159 enum machine_mode vec_mode
;
4164 vectype
= get_vectype_for_scalar_type (scalar_type
);
4168 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4170 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4172 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4174 || (optab_handler (optab
, TYPE_MODE (vectype
))
4175 == CODE_FOR_nothing
))
4179 vec_mode
= TYPE_MODE (vectype
);
4180 icode
= (int) optab_handler (optab
, vec_mode
);
4181 if (icode
== CODE_FOR_nothing
)
4188 /* Function vectorizable_shift.
4190 Check if STMT performs a shift operation that can be vectorized.
4191 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4192 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4193 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4196 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4197 gimple
*vec_stmt
, slp_tree slp_node
)
4201 tree op0
, op1
= NULL
;
4202 tree vec_oprnd1
= NULL_TREE
;
4203 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4205 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4206 enum tree_code code
;
4207 enum machine_mode vec_mode
;
4211 enum machine_mode optab_op2_mode
;
4214 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4215 gimple new_stmt
= NULL
;
4216 stmt_vec_info prev_stmt_info
;
4223 vec
<tree
> vec_oprnds0
= vNULL
;
4224 vec
<tree
> vec_oprnds1
= vNULL
;
4227 bool scalar_shift_arg
= true;
4228 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4231 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4234 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4237 /* Is STMT a vectorizable binary/unary operation? */
4238 if (!is_gimple_assign (stmt
))
4241 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4244 code
= gimple_assign_rhs_code (stmt
);
4246 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4247 || code
== RROTATE_EXPR
))
4250 scalar_dest
= gimple_assign_lhs (stmt
);
4251 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4252 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4253 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4255 if (dump_enabled_p ())
4256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4257 "bit-precision shifts not supported.\n");
4261 op0
= gimple_assign_rhs1 (stmt
);
4262 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4263 &def_stmt
, &def
, &dt
[0], &vectype
))
4265 if (dump_enabled_p ())
4266 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4267 "use not simple.\n");
4270 /* If op0 is an external or constant def use a vector type with
4271 the same size as the output vector type. */
4273 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4275 gcc_assert (vectype
);
4278 if (dump_enabled_p ())
4279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4280 "no vectype for scalar type\n");
4284 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4285 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4286 if (nunits_out
!= nunits_in
)
4289 op1
= gimple_assign_rhs2 (stmt
);
4290 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4291 &def
, &dt
[1], &op1_vectype
))
4293 if (dump_enabled_p ())
4294 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4295 "use not simple.\n");
4300 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4304 /* Multiple types in SLP are handled by creating the appropriate number of
4305 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4307 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4310 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4312 gcc_assert (ncopies
>= 1);
4314 /* Determine whether the shift amount is a vector, or scalar. If the
4315 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4317 if (dt
[1] == vect_internal_def
&& !slp_node
)
4318 scalar_shift_arg
= false;
4319 else if (dt
[1] == vect_constant_def
4320 || dt
[1] == vect_external_def
4321 || dt
[1] == vect_internal_def
)
4323 /* In SLP, need to check whether the shift count is the same,
4324 in loops if it is a constant or invariant, it is always
4328 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4331 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4332 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4333 scalar_shift_arg
= false;
4338 if (dump_enabled_p ())
4339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4340 "operand mode requires invariant argument.\n");
4344 /* Vector shifted by vector. */
4345 if (!scalar_shift_arg
)
4347 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4348 if (dump_enabled_p ())
4349 dump_printf_loc (MSG_NOTE
, vect_location
,
4350 "vector/vector shift/rotate found.\n");
4353 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4354 if (op1_vectype
== NULL_TREE
4355 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4357 if (dump_enabled_p ())
4358 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4359 "unusable type for last operand in"
4360 " vector/vector shift/rotate.\n");
4364 /* See if the machine has a vector shifted by scalar insn and if not
4365 then see if it has a vector shifted by vector insn. */
4368 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4370 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4372 if (dump_enabled_p ())
4373 dump_printf_loc (MSG_NOTE
, vect_location
,
4374 "vector/scalar shift/rotate found.\n");
4378 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4380 && (optab_handler (optab
, TYPE_MODE (vectype
))
4381 != CODE_FOR_nothing
))
4383 scalar_shift_arg
= false;
4385 if (dump_enabled_p ())
4386 dump_printf_loc (MSG_NOTE
, vect_location
,
4387 "vector/vector shift/rotate found.\n");
4389 /* Unlike the other binary operators, shifts/rotates have
4390 the rhs being int, instead of the same type as the lhs,
4391 so make sure the scalar is the right type if we are
4392 dealing with vectors of long long/long/short/char. */
4393 if (dt
[1] == vect_constant_def
)
4394 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4395 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4399 && TYPE_MODE (TREE_TYPE (vectype
))
4400 != TYPE_MODE (TREE_TYPE (op1
)))
4402 if (dump_enabled_p ())
4403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4404 "unusable type for last operand in"
4405 " vector/vector shift/rotate.\n");
4408 if (vec_stmt
&& !slp_node
)
4410 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4411 op1
= vect_init_vector (stmt
, op1
,
4412 TREE_TYPE (vectype
), NULL
);
4419 /* Supportable by target? */
4422 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4427 vec_mode
= TYPE_MODE (vectype
);
4428 icode
= (int) optab_handler (optab
, vec_mode
);
4429 if (icode
== CODE_FOR_nothing
)
4431 if (dump_enabled_p ())
4432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4433 "op not supported by target.\n");
4434 /* Check only during analysis. */
4435 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4436 || (vf
< vect_min_worthwhile_factor (code
)
4439 if (dump_enabled_p ())
4440 dump_printf_loc (MSG_NOTE
, vect_location
,
4441 "proceeding using word mode.\n");
4444 /* Worthwhile without SIMD support? Check only during analysis. */
4445 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4446 && vf
< vect_min_worthwhile_factor (code
)
4449 if (dump_enabled_p ())
4450 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4451 "not worthwhile without SIMD support.\n");
4455 if (!vec_stmt
) /* transformation not required. */
4457 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4458 if (dump_enabled_p ())
4459 dump_printf_loc (MSG_NOTE
, vect_location
,
4460 "=== vectorizable_shift ===\n");
4461 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4467 if (dump_enabled_p ())
4468 dump_printf_loc (MSG_NOTE
, vect_location
,
4469 "transform binary/unary operation.\n");
4472 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4474 prev_stmt_info
= NULL
;
4475 for (j
= 0; j
< ncopies
; j
++)
4480 if (scalar_shift_arg
)
4482 /* Vector shl and shr insn patterns can be defined with scalar
4483 operand 2 (shift operand). In this case, use constant or loop
4484 invariant op1 directly, without extending it to vector mode
4486 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4487 if (!VECTOR_MODE_P (optab_op2_mode
))
4489 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE
, vect_location
,
4491 "operand 1 using scalar mode.\n");
4493 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4494 vec_oprnds1
.quick_push (vec_oprnd1
);
4497 /* Store vec_oprnd1 for every vector stmt to be created
4498 for SLP_NODE. We check during the analysis that all
4499 the shift arguments are the same.
4500 TODO: Allow different constants for different vector
4501 stmts generated for an SLP instance. */
4502 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4503 vec_oprnds1
.quick_push (vec_oprnd1
);
4508 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4509 (a special case for certain kind of vector shifts); otherwise,
4510 operand 1 should be of a vector type (the usual case). */
4512 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4515 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4519 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4521 /* Arguments are ready. Create the new vector stmt. */
4522 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4524 vop1
= vec_oprnds1
[i
];
4525 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4526 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4527 gimple_assign_set_lhs (new_stmt
, new_temp
);
4528 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4530 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4537 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4539 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4540 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4543 vec_oprnds0
.release ();
4544 vec_oprnds1
.release ();
4550 /* Function vectorizable_operation.
4552 Check if STMT performs a binary, unary or ternary operation that can
4554 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4555 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4556 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4559 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4560 gimple
*vec_stmt
, slp_tree slp_node
)
4564 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4565 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4567 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4568 enum tree_code code
;
4569 enum machine_mode vec_mode
;
4576 enum vect_def_type dt
[3]
4577 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4578 gimple new_stmt
= NULL
;
4579 stmt_vec_info prev_stmt_info
;
4585 vec
<tree
> vec_oprnds0
= vNULL
;
4586 vec
<tree
> vec_oprnds1
= vNULL
;
4587 vec
<tree
> vec_oprnds2
= vNULL
;
4588 tree vop0
, vop1
, vop2
;
4589 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4592 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4595 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4598 /* Is STMT a vectorizable binary/unary operation? */
4599 if (!is_gimple_assign (stmt
))
4602 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4605 code
= gimple_assign_rhs_code (stmt
);
4607 /* For pointer addition, we should use the normal plus for
4608 the vector addition. */
4609 if (code
== POINTER_PLUS_EXPR
)
4612 /* Support only unary or binary operations. */
4613 op_type
= TREE_CODE_LENGTH (code
);
4614 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4616 if (dump_enabled_p ())
4617 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4618 "num. args = %d (not unary/binary/ternary op).\n",
4623 scalar_dest
= gimple_assign_lhs (stmt
);
4624 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4626 /* Most operations cannot handle bit-precision types without extra
4628 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4629 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4630 /* Exception are bitwise binary operations. */
4631 && code
!= BIT_IOR_EXPR
4632 && code
!= BIT_XOR_EXPR
4633 && code
!= BIT_AND_EXPR
)
4635 if (dump_enabled_p ())
4636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4637 "bit-precision arithmetic not supported.\n");
4641 op0
= gimple_assign_rhs1 (stmt
);
4642 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4643 &def_stmt
, &def
, &dt
[0], &vectype
))
4645 if (dump_enabled_p ())
4646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4647 "use not simple.\n");
4650 /* If op0 is an external or constant def use a vector type with
4651 the same size as the output vector type. */
4653 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4655 gcc_assert (vectype
);
4658 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4661 "no vectype for scalar type ");
4662 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4664 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4670 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4671 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4672 if (nunits_out
!= nunits_in
)
4675 if (op_type
== binary_op
|| op_type
== ternary_op
)
4677 op1
= gimple_assign_rhs2 (stmt
);
4678 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4681 if (dump_enabled_p ())
4682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4683 "use not simple.\n");
4687 if (op_type
== ternary_op
)
4689 op2
= gimple_assign_rhs3 (stmt
);
4690 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4693 if (dump_enabled_p ())
4694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4695 "use not simple.\n");
4701 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4705 /* Multiple types in SLP are handled by creating the appropriate number of
4706 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4708 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4711 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4713 gcc_assert (ncopies
>= 1);
4715 /* Shifts are handled in vectorizable_shift (). */
4716 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4717 || code
== RROTATE_EXPR
)
4720 /* Supportable by target? */
4722 vec_mode
= TYPE_MODE (vectype
);
4723 if (code
== MULT_HIGHPART_EXPR
)
4725 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4726 icode
= LAST_INSN_CODE
;
4728 icode
= CODE_FOR_nothing
;
4732 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4735 if (dump_enabled_p ())
4736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4740 icode
= (int) optab_handler (optab
, vec_mode
);
4743 if (icode
== CODE_FOR_nothing
)
4745 if (dump_enabled_p ())
4746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4747 "op not supported by target.\n");
4748 /* Check only during analysis. */
4749 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4750 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_NOTE
, vect_location
,
4754 "proceeding using word mode.\n");
4757 /* Worthwhile without SIMD support? Check only during analysis. */
4758 if (!VECTOR_MODE_P (vec_mode
)
4760 && vf
< vect_min_worthwhile_factor (code
))
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4764 "not worthwhile without SIMD support.\n");
4768 if (!vec_stmt
) /* transformation not required. */
4770 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4771 if (dump_enabled_p ())
4772 dump_printf_loc (MSG_NOTE
, vect_location
,
4773 "=== vectorizable_operation ===\n");
4774 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4780 if (dump_enabled_p ())
4781 dump_printf_loc (MSG_NOTE
, vect_location
,
4782 "transform binary/unary operation.\n");
4785 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4787 /* In case the vectorization factor (VF) is bigger than the number
4788 of elements that we can fit in a vectype (nunits), we have to generate
4789 more than one vector stmt - i.e - we need to "unroll" the
4790 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4791 from one copy of the vector stmt to the next, in the field
4792 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4793 stages to find the correct vector defs to be used when vectorizing
4794 stmts that use the defs of the current stmt. The example below
4795 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4796 we need to create 4 vectorized stmts):
4798 before vectorization:
4799 RELATED_STMT VEC_STMT
4803 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4805 RELATED_STMT VEC_STMT
4806 VS1_0: vx0 = memref0 VS1_1 -
4807 VS1_1: vx1 = memref1 VS1_2 -
4808 VS1_2: vx2 = memref2 VS1_3 -
4809 VS1_3: vx3 = memref3 - -
4810 S1: x = load - VS1_0
4813 step2: vectorize stmt S2 (done here):
4814 To vectorize stmt S2 we first need to find the relevant vector
4815 def for the first operand 'x'. This is, as usual, obtained from
4816 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4817 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4818 relevant vector def 'vx0'. Having found 'vx0' we can generate
4819 the vector stmt VS2_0, and as usual, record it in the
4820 STMT_VINFO_VEC_STMT of stmt S2.
4821 When creating the second copy (VS2_1), we obtain the relevant vector
4822 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4823 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4824 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4825 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4826 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4827 chain of stmts and pointers:
4828 RELATED_STMT VEC_STMT
4829 VS1_0: vx0 = memref0 VS1_1 -
4830 VS1_1: vx1 = memref1 VS1_2 -
4831 VS1_2: vx2 = memref2 VS1_3 -
4832 VS1_3: vx3 = memref3 - -
4833 S1: x = load - VS1_0
4834 VS2_0: vz0 = vx0 + v1 VS2_1 -
4835 VS2_1: vz1 = vx1 + v1 VS2_2 -
4836 VS2_2: vz2 = vx2 + v1 VS2_3 -
4837 VS2_3: vz3 = vx3 + v1 - -
4838 S2: z = x + 1 - VS2_0 */
4840 prev_stmt_info
= NULL
;
4841 for (j
= 0; j
< ncopies
; j
++)
4846 if (op_type
== binary_op
|| op_type
== ternary_op
)
4847 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4850 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4852 if (op_type
== ternary_op
)
4854 vec_oprnds2
.create (1);
4855 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4862 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4863 if (op_type
== ternary_op
)
4865 tree vec_oprnd
= vec_oprnds2
.pop ();
4866 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4871 /* Arguments are ready. Create the new vector stmt. */
4872 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4874 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4875 ? vec_oprnds1
[i
] : NULL_TREE
);
4876 vop2
= ((op_type
== ternary_op
)
4877 ? vec_oprnds2
[i
] : NULL_TREE
);
4878 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
4880 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4881 gimple_assign_set_lhs (new_stmt
, new_temp
);
4882 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4884 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4891 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4893 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4894 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4897 vec_oprnds0
.release ();
4898 vec_oprnds1
.release ();
4899 vec_oprnds2
.release ();
4904 /* A helper function to ensure data reference DR's base alignment
4908 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4913 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4915 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4916 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4918 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4919 DECL_USER_ALIGN (base_decl
) = 1;
4920 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4925 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4926 reversal of the vector elements. If that is impossible to do,
4930 perm_mask_for_reverse (tree vectype
)
4935 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4936 sel
= XALLOCAVEC (unsigned char, nunits
);
4938 for (i
= 0; i
< nunits
; ++i
)
4939 sel
[i
] = nunits
- 1 - i
;
4941 return vect_gen_perm_mask (vectype
, sel
);
4944 /* Function vectorizable_store.
4946 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4948 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4949 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4950 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4953 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4959 tree vec_oprnd
= NULL_TREE
;
4960 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4961 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4964 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4965 struct loop
*loop
= NULL
;
4966 enum machine_mode vec_mode
;
4968 enum dr_alignment_support alignment_support_scheme
;
4971 enum vect_def_type dt
;
4972 stmt_vec_info prev_stmt_info
= NULL
;
4973 tree dataref_ptr
= NULL_TREE
;
4974 tree dataref_offset
= NULL_TREE
;
4975 gimple ptr_incr
= NULL
;
4976 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4979 gimple next_stmt
, first_stmt
= NULL
;
4980 bool grouped_store
= false;
4981 bool store_lanes_p
= false;
4982 unsigned int group_size
, i
;
4983 vec
<tree
> dr_chain
= vNULL
;
4984 vec
<tree
> oprnds
= vNULL
;
4985 vec
<tree
> result_chain
= vNULL
;
4987 bool negative
= false;
4988 tree offset
= NULL_TREE
;
4989 vec
<tree
> vec_oprnds
= vNULL
;
4990 bool slp
= (slp_node
!= NULL
);
4991 unsigned int vec_num
;
4992 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4996 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4998 /* Multiple types in SLP are handled by creating the appropriate number of
4999 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5001 if (slp
|| PURE_SLP_STMT (stmt_info
))
5004 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5006 gcc_assert (ncopies
>= 1);
5008 /* FORNOW. This restriction should be relaxed. */
5009 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5011 if (dump_enabled_p ())
5012 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5013 "multiple types in nested loop.\n");
5017 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5020 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5023 /* Is vectorizable store? */
5025 if (!is_gimple_assign (stmt
))
5028 scalar_dest
= gimple_assign_lhs (stmt
);
5029 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5030 && is_pattern_stmt_p (stmt_info
))
5031 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5032 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5033 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5034 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5035 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5036 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5037 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5038 && TREE_CODE (scalar_dest
) != MEM_REF
)
5041 gcc_assert (gimple_assign_single_p (stmt
));
5042 op
= gimple_assign_rhs1 (stmt
);
5043 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5046 if (dump_enabled_p ())
5047 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5048 "use not simple.\n");
5052 elem_type
= TREE_TYPE (vectype
);
5053 vec_mode
= TYPE_MODE (vectype
);
5055 /* FORNOW. In some cases can vectorize even if data-type not supported
5056 (e.g. - array initialization with 0). */
5057 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5060 if (!STMT_VINFO_DATA_REF (stmt_info
))
5064 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5065 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5066 size_zero_node
) < 0;
5067 if (negative
&& ncopies
> 1)
5069 if (dump_enabled_p ())
5070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5071 "multiple types with negative step.\n");
5077 gcc_assert (!grouped_store
);
5078 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5079 if (alignment_support_scheme
!= dr_aligned
5080 && alignment_support_scheme
!= dr_unaligned_supported
)
5082 if (dump_enabled_p ())
5083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5084 "negative step but alignment required.\n");
5087 if (dt
!= vect_constant_def
5088 && dt
!= vect_external_def
5089 && !perm_mask_for_reverse (vectype
))
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5093 "negative step and reversing not supported.\n");
5098 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5100 grouped_store
= true;
5101 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5102 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5104 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5105 if (vect_store_lanes_supported (vectype
, group_size
))
5106 store_lanes_p
= true;
5107 else if (!vect_grouped_store_supported (vectype
, group_size
))
5111 if (first_stmt
== stmt
)
5113 /* STMT is the leader of the group. Check the operands of all the
5114 stmts of the group. */
5115 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5118 gcc_assert (gimple_assign_single_p (next_stmt
));
5119 op
= gimple_assign_rhs1 (next_stmt
);
5120 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5121 &def_stmt
, &def
, &dt
))
5123 if (dump_enabled_p ())
5124 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5125 "use not simple.\n");
5128 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5133 if (!vec_stmt
) /* transformation not required. */
5135 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5136 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5143 ensure_base_align (stmt_info
, dr
);
5147 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5148 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5150 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5153 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5155 /* We vectorize all the stmts of the interleaving group when we
5156 reach the last stmt in the group. */
5157 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5158 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5167 grouped_store
= false;
5168 /* VEC_NUM is the number of vect stmts to be created for this
5170 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5171 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5172 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5173 op
= gimple_assign_rhs1 (first_stmt
);
5176 /* VEC_NUM is the number of vect stmts to be created for this
5178 vec_num
= group_size
;
5184 group_size
= vec_num
= 1;
5187 if (dump_enabled_p ())
5188 dump_printf_loc (MSG_NOTE
, vect_location
,
5189 "transform store. ncopies = %d\n", ncopies
);
5191 dr_chain
.create (group_size
);
5192 oprnds
.create (group_size
);
5194 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5195 gcc_assert (alignment_support_scheme
);
5196 /* Targets with store-lane instructions must not require explicit
5198 gcc_assert (!store_lanes_p
5199 || alignment_support_scheme
== dr_aligned
5200 || alignment_support_scheme
== dr_unaligned_supported
);
5203 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5206 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5208 aggr_type
= vectype
;
5210 /* In case the vectorization factor (VF) is bigger than the number
5211 of elements that we can fit in a vectype (nunits), we have to generate
5212 more than one vector stmt - i.e - we need to "unroll" the
5213 vector stmt by a factor VF/nunits. For more details see documentation in
5214 vect_get_vec_def_for_copy_stmt. */
5216 /* In case of interleaving (non-unit grouped access):
5223 We create vectorized stores starting from base address (the access of the
5224 first stmt in the chain (S2 in the above example), when the last store stmt
5225 of the chain (S4) is reached:
5228 VS2: &base + vec_size*1 = vx0
5229 VS3: &base + vec_size*2 = vx1
5230 VS4: &base + vec_size*3 = vx3
5232 Then permutation statements are generated:
5234 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5235 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5238 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5239 (the order of the data-refs in the output of vect_permute_store_chain
5240 corresponds to the order of scalar stmts in the interleaving chain - see
5241 the documentation of vect_permute_store_chain()).
5243 In case of both multiple types and interleaving, above vector stores and
5244 permutation stmts are created for every copy. The result vector stmts are
5245 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5246 STMT_VINFO_RELATED_STMT for the next copies.
5249 prev_stmt_info
= NULL
;
5250 for (j
= 0; j
< ncopies
; j
++)
5258 /* Get vectorized arguments for SLP_NODE. */
5259 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5260 NULL
, slp_node
, -1);
5262 vec_oprnd
= vec_oprnds
[0];
5266 /* For interleaved stores we collect vectorized defs for all the
5267 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5268 used as an input to vect_permute_store_chain(), and OPRNDS as
5269 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5271 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5272 OPRNDS are of size 1. */
5273 next_stmt
= first_stmt
;
5274 for (i
= 0; i
< group_size
; i
++)
5276 /* Since gaps are not supported for interleaved stores,
5277 GROUP_SIZE is the exact number of stmts in the chain.
5278 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5279 there is no interleaving, GROUP_SIZE is 1, and only one
5280 iteration of the loop will be executed. */
5281 gcc_assert (next_stmt
5282 && gimple_assign_single_p (next_stmt
));
5283 op
= gimple_assign_rhs1 (next_stmt
);
5285 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5287 dr_chain
.quick_push (vec_oprnd
);
5288 oprnds
.quick_push (vec_oprnd
);
5289 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5293 /* We should have catched mismatched types earlier. */
5294 gcc_assert (useless_type_conversion_p (vectype
,
5295 TREE_TYPE (vec_oprnd
)));
5296 bool simd_lane_access_p
5297 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5298 if (simd_lane_access_p
5299 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5300 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5301 && integer_zerop (DR_OFFSET (first_dr
))
5302 && integer_zerop (DR_INIT (first_dr
))
5303 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5304 get_alias_set (DR_REF (first_dr
))))
5306 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5307 dataref_offset
= build_int_cst (reference_alias_ptr_type
5308 (DR_REF (first_dr
)), 0);
5313 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5314 simd_lane_access_p
? loop
: NULL
,
5315 offset
, &dummy
, gsi
, &ptr_incr
,
5316 simd_lane_access_p
, &inv_p
);
5317 gcc_assert (bb_vinfo
|| !inv_p
);
5321 /* For interleaved stores we created vectorized defs for all the
5322 defs stored in OPRNDS in the previous iteration (previous copy).
5323 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5324 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5326 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5327 OPRNDS are of size 1. */
5328 for (i
= 0; i
< group_size
; i
++)
5331 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5333 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5334 dr_chain
[i
] = vec_oprnd
;
5335 oprnds
[i
] = vec_oprnd
;
5339 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5340 TYPE_SIZE_UNIT (aggr_type
));
5342 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5343 TYPE_SIZE_UNIT (aggr_type
));
5350 /* Combine all the vectors into an array. */
5351 vec_array
= create_vector_array (vectype
, vec_num
);
5352 for (i
= 0; i
< vec_num
; i
++)
5354 vec_oprnd
= dr_chain
[i
];
5355 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5359 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5360 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5361 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5362 gimple_call_set_lhs (new_stmt
, data_ref
);
5363 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5371 result_chain
.create (group_size
);
5373 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5377 next_stmt
= first_stmt
;
5378 for (i
= 0; i
< vec_num
; i
++)
5380 unsigned align
, misalign
;
5383 /* Bump the vector pointer. */
5384 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5388 vec_oprnd
= vec_oprnds
[i
];
5389 else if (grouped_store
)
5390 /* For grouped stores vectorized defs are interleaved in
5391 vect_permute_store_chain(). */
5392 vec_oprnd
= result_chain
[i
];
5394 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5397 : build_int_cst (reference_alias_ptr_type
5398 (DR_REF (first_dr
)), 0));
5399 align
= TYPE_ALIGN_UNIT (vectype
);
5400 if (aligned_access_p (first_dr
))
5402 else if (DR_MISALIGNMENT (first_dr
) == -1)
5404 TREE_TYPE (data_ref
)
5405 = build_aligned_type (TREE_TYPE (data_ref
),
5406 TYPE_ALIGN (elem_type
));
5407 align
= TYPE_ALIGN_UNIT (elem_type
);
5412 TREE_TYPE (data_ref
)
5413 = build_aligned_type (TREE_TYPE (data_ref
),
5414 TYPE_ALIGN (elem_type
));
5415 misalign
= DR_MISALIGNMENT (first_dr
);
5417 if (dataref_offset
== NULL_TREE
)
5418 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5422 && dt
!= vect_constant_def
5423 && dt
!= vect_external_def
)
5425 tree perm_mask
= perm_mask_for_reverse (vectype
);
5427 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5429 tree new_temp
= make_ssa_name (perm_dest
, NULL
);
5431 /* Generate the permute statement. */
5433 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, new_temp
,
5434 vec_oprnd
, vec_oprnd
,
5436 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5438 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5439 vec_oprnd
= new_temp
;
5442 /* Arguments are ready. Create the new vector stmt. */
5443 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5444 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5449 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5457 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5459 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5460 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5464 dr_chain
.release ();
5466 result_chain
.release ();
5467 vec_oprnds
.release ();
5472 /* Given a vector type VECTYPE and permutation SEL returns
5473 the VECTOR_CST mask that implements the permutation of the
5474 vector elements. If that is impossible to do, returns NULL. */
5477 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
5479 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5482 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5484 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5487 mask_elt_type
= lang_hooks
.types
.type_for_mode
5488 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5489 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5491 mask_elts
= XALLOCAVEC (tree
, nunits
);
5492 for (i
= nunits
- 1; i
>= 0; i
--)
5493 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5494 mask_vec
= build_vector (mask_type
, mask_elts
);
5499 /* Given a vector variable X and Y, that was generated for the scalar
5500 STMT, generate instructions to permute the vector elements of X and Y
5501 using permutation mask MASK_VEC, insert them at *GSI and return the
5502 permuted vector variable. */
5505 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5506 gimple_stmt_iterator
*gsi
)
5508 tree vectype
= TREE_TYPE (x
);
5509 tree perm_dest
, data_ref
;
5512 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5513 data_ref
= make_ssa_name (perm_dest
, NULL
);
5515 /* Generate the permute statement. */
5516 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5518 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5523 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5524 inserting them on the loops preheader edge. Returns true if we
5525 were successful in doing so (and thus STMT can be moved then),
5526 otherwise returns false. */
5529 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5535 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5537 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5538 if (!gimple_nop_p (def_stmt
)
5539 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5541 /* Make sure we don't need to recurse. While we could do
5542 so in simple cases when there are more complex use webs
5543 we don't have an easy way to preserve stmt order to fulfil
5544 dependencies within them. */
5547 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5549 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5551 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5552 if (!gimple_nop_p (def_stmt2
)
5553 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5563 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5565 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5566 if (!gimple_nop_p (def_stmt
)
5567 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5569 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5570 gsi_remove (&gsi
, false);
5571 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5578 /* vectorizable_load.
5580 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5582 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5583 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5584 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5587 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5588 slp_tree slp_node
, slp_instance slp_node_instance
)
5591 tree vec_dest
= NULL
;
5592 tree data_ref
= NULL
;
5593 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5594 stmt_vec_info prev_stmt_info
;
5595 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5596 struct loop
*loop
= NULL
;
5597 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5598 bool nested_in_vect_loop
= false;
5599 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5600 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5603 enum machine_mode mode
;
5604 gimple new_stmt
= NULL
;
5606 enum dr_alignment_support alignment_support_scheme
;
5607 tree dataref_ptr
= NULL_TREE
;
5608 tree dataref_offset
= NULL_TREE
;
5609 gimple ptr_incr
= NULL
;
5610 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5612 int i
, j
, group_size
, group_gap
;
5613 tree msq
= NULL_TREE
, lsq
;
5614 tree offset
= NULL_TREE
;
5615 tree byte_offset
= NULL_TREE
;
5616 tree realignment_token
= NULL_TREE
;
5618 vec
<tree
> dr_chain
= vNULL
;
5619 bool grouped_load
= false;
5620 bool load_lanes_p
= false;
5623 bool negative
= false;
5624 bool compute_in_loop
= false;
5625 struct loop
*at_loop
;
5627 bool slp
= (slp_node
!= NULL
);
5628 bool slp_perm
= false;
5629 enum tree_code code
;
5630 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5633 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5634 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5635 int gather_scale
= 1;
5636 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5640 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5641 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5642 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5647 /* Multiple types in SLP are handled by creating the appropriate number of
5648 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5650 if (slp
|| PURE_SLP_STMT (stmt_info
))
5653 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5655 gcc_assert (ncopies
>= 1);
5657 /* FORNOW. This restriction should be relaxed. */
5658 if (nested_in_vect_loop
&& ncopies
> 1)
5660 if (dump_enabled_p ())
5661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5662 "multiple types in nested loop.\n");
5666 /* Invalidate assumptions made by dependence analysis when vectorization
5667 on the unrolled body effectively re-orders stmts. */
5669 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5670 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5671 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5673 if (dump_enabled_p ())
5674 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5675 "cannot perform implicit CSE when unrolling "
5676 "with negative dependence distance\n");
5680 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5683 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5686 /* Is vectorizable load? */
5687 if (!is_gimple_assign (stmt
))
5690 scalar_dest
= gimple_assign_lhs (stmt
);
5691 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5694 code
= gimple_assign_rhs_code (stmt
);
5695 if (code
!= ARRAY_REF
5696 && code
!= BIT_FIELD_REF
5697 && code
!= INDIRECT_REF
5698 && code
!= COMPONENT_REF
5699 && code
!= IMAGPART_EXPR
5700 && code
!= REALPART_EXPR
5702 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5705 if (!STMT_VINFO_DATA_REF (stmt_info
))
5708 elem_type
= TREE_TYPE (vectype
);
5709 mode
= TYPE_MODE (vectype
);
5711 /* FORNOW. In some cases can vectorize even if data-type not supported
5712 (e.g. - data copies). */
5713 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5715 if (dump_enabled_p ())
5716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5717 "Aligned load, but unsupported type.\n");
5721 /* Check if the load is a part of an interleaving chain. */
5722 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5724 grouped_load
= true;
5726 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5728 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5729 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5731 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5732 if (vect_load_lanes_supported (vectype
, group_size
))
5733 load_lanes_p
= true;
5734 else if (!vect_grouped_load_supported (vectype
, group_size
))
5738 /* Invalidate assumptions made by dependence analysis when vectorization
5739 on the unrolled body effectively re-orders stmts. */
5740 if (!PURE_SLP_STMT (stmt_info
)
5741 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5742 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5743 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5745 if (dump_enabled_p ())
5746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5747 "cannot perform implicit CSE when performing "
5748 "group loads with negative dependence distance\n");
5754 if (STMT_VINFO_GATHER_P (stmt_info
))
5758 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5759 &gather_off
, &gather_scale
);
5760 gcc_assert (gather_decl
);
5761 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5762 &def_stmt
, &def
, &gather_dt
,
5763 &gather_off_vectype
))
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5767 "gather index use not simple.\n");
5771 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5775 negative
= tree_int_cst_compare (nested_in_vect_loop
5776 ? STMT_VINFO_DR_STEP (stmt_info
)
5778 size_zero_node
) < 0;
5779 if (negative
&& ncopies
> 1)
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5783 "multiple types with negative step.\n");
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5793 "negative step for group load not supported"
5797 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5798 if (alignment_support_scheme
!= dr_aligned
5799 && alignment_support_scheme
!= dr_unaligned_supported
)
5801 if (dump_enabled_p ())
5802 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5803 "negative step but alignment required.\n");
5806 if (!perm_mask_for_reverse (vectype
))
5808 if (dump_enabled_p ())
5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5810 "negative step and reversing not supported."
5817 if (!vec_stmt
) /* transformation not required. */
5819 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5820 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_NOTE
, vect_location
,
5826 "transform load. ncopies = %d\n", ncopies
);
5830 ensure_base_align (stmt_info
, dr
);
5832 if (STMT_VINFO_GATHER_P (stmt_info
))
5834 tree vec_oprnd0
= NULL_TREE
, op
;
5835 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
5836 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5837 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
5838 edge pe
= loop_preheader_edge (loop
);
5841 enum { NARROW
, NONE
, WIDEN
} modifier
;
5842 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
5844 if (nunits
== gather_off_nunits
)
5846 else if (nunits
== gather_off_nunits
/ 2)
5848 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
5851 for (i
= 0; i
< gather_off_nunits
; ++i
)
5852 sel
[i
] = i
| nunits
;
5854 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
5855 gcc_assert (perm_mask
!= NULL_TREE
);
5857 else if (nunits
== gather_off_nunits
* 2)
5859 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5862 for (i
= 0; i
< nunits
; ++i
)
5863 sel
[i
] = i
< gather_off_nunits
5864 ? i
: i
+ nunits
- gather_off_nunits
;
5866 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
5867 gcc_assert (perm_mask
!= NULL_TREE
);
5873 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
5874 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5875 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5876 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5877 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5878 scaletype
= TREE_VALUE (arglist
);
5879 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
5881 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5883 ptr
= fold_convert (ptrtype
, gather_base
);
5884 if (!is_gimple_min_invariant (ptr
))
5886 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5887 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5888 gcc_assert (!new_bb
);
5891 /* Currently we support only unconditional gather loads,
5892 so mask should be all ones. */
5893 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
5894 mask
= build_int_cst (masktype
, -1);
5895 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
5897 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
5898 mask
= build_vector_from_val (masktype
, mask
);
5899 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5901 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
5905 for (j
= 0; j
< 6; ++j
)
5907 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
5908 mask
= build_real (TREE_TYPE (masktype
), r
);
5909 mask
= build_vector_from_val (masktype
, mask
);
5910 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5915 scale
= build_int_cst (scaletype
, gather_scale
);
5917 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
5918 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
5919 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
5923 for (j
= 0; j
< 6; ++j
)
5925 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
5926 merge
= build_real (TREE_TYPE (rettype
), r
);
5930 merge
= build_vector_from_val (rettype
, merge
);
5931 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
5933 prev_stmt_info
= NULL
;
5934 for (j
= 0; j
< ncopies
; ++j
)
5936 if (modifier
== WIDEN
&& (j
& 1))
5937 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
5938 perm_mask
, stmt
, gsi
);
5941 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
5944 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
5946 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5948 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5949 == TYPE_VECTOR_SUBPARTS (idxtype
));
5950 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
5951 var
= make_ssa_name (var
, NULL
);
5952 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5954 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
5956 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5961 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
5963 if (!useless_type_conversion_p (vectype
, rettype
))
5965 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
5966 == TYPE_VECTOR_SUBPARTS (rettype
));
5967 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
5968 op
= make_ssa_name (var
, new_stmt
);
5969 gimple_call_set_lhs (new_stmt
, op
);
5970 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5971 var
= make_ssa_name (vec_dest
, NULL
);
5972 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
5974 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
5979 var
= make_ssa_name (vec_dest
, new_stmt
);
5980 gimple_call_set_lhs (new_stmt
, var
);
5983 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5985 if (modifier
== NARROW
)
5992 var
= permute_vec_elements (prev_res
, var
,
5993 perm_mask
, stmt
, gsi
);
5994 new_stmt
= SSA_NAME_DEF_STMT (var
);
5997 if (prev_stmt_info
== NULL
)
5998 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6000 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6001 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6005 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
6007 gimple_stmt_iterator incr_gsi
;
6013 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6014 gimple_seq stmts
= NULL
;
6015 tree stride_base
, stride_step
, alias_off
;
6017 gcc_assert (!nested_in_vect_loop
);
6020 = fold_build_pointer_plus
6021 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6022 size_binop (PLUS_EXPR
,
6023 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6024 convert_to_ptrofftype (DR_INIT (dr
))));
6025 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6027 /* For a load with loop-invariant (but other than power-of-2)
6028 stride (i.e. not a grouped access) like so:
6030 for (i = 0; i < n; i += stride)
6033 we generate a new induction variable and new accesses to
6034 form a new vector (or vectors, depending on ncopies):
6036 for (j = 0; ; j += VF*stride)
6038 tmp2 = array[j + stride];
6040 vectemp = {tmp1, tmp2, ...}
6043 ivstep
= stride_step
;
6044 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6045 build_int_cst (TREE_TYPE (ivstep
), vf
));
6047 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6049 create_iv (stride_base
, ivstep
, NULL
,
6050 loop
, &incr_gsi
, insert_after
,
6052 incr
= gsi_stmt (incr_gsi
);
6053 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6055 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6057 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6059 prev_stmt_info
= NULL
;
6060 running_off
= offvar
;
6061 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6062 for (j
= 0; j
< ncopies
; j
++)
6066 vec_alloc (v
, nunits
);
6067 for (i
= 0; i
< nunits
; i
++)
6069 tree newref
, newoff
;
6071 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
6072 running_off
, alias_off
);
6074 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6077 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6078 newoff
= copy_ssa_name (running_off
, NULL
);
6079 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
6080 running_off
, stride_step
);
6081 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6083 running_off
= newoff
;
6086 vec_inv
= build_constructor (vectype
, v
);
6087 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6088 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6091 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6093 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6094 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6101 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6103 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6104 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6105 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6107 /* Check if the chain of loads is already vectorized. */
6108 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6109 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6110 ??? But we can only do so if there is exactly one
6111 as we have no way to get at the rest. Leave the CSE
6113 ??? With the group load eventually participating
6114 in multiple different permutations (having multiple
6115 slp nodes which refer to the same group) the CSE
6116 is even wrong code. See PR56270. */
6119 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6122 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6123 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6125 /* VEC_NUM is the number of vect stmts to be created for this group. */
6128 grouped_load
= false;
6129 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6130 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6132 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6136 vec_num
= group_size
;
6144 group_size
= vec_num
= 1;
6148 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6149 gcc_assert (alignment_support_scheme
);
6150 /* Targets with load-lane instructions must not require explicit
6152 gcc_assert (!load_lanes_p
6153 || alignment_support_scheme
== dr_aligned
6154 || alignment_support_scheme
== dr_unaligned_supported
);
6156 /* In case the vectorization factor (VF) is bigger than the number
6157 of elements that we can fit in a vectype (nunits), we have to generate
6158 more than one vector stmt - i.e - we need to "unroll" the
6159 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6160 from one copy of the vector stmt to the next, in the field
6161 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6162 stages to find the correct vector defs to be used when vectorizing
6163 stmts that use the defs of the current stmt. The example below
6164 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6165 need to create 4 vectorized stmts):
6167 before vectorization:
6168 RELATED_STMT VEC_STMT
6172 step 1: vectorize stmt S1:
6173 We first create the vector stmt VS1_0, and, as usual, record a
6174 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6175 Next, we create the vector stmt VS1_1, and record a pointer to
6176 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6177 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6179 RELATED_STMT VEC_STMT
6180 VS1_0: vx0 = memref0 VS1_1 -
6181 VS1_1: vx1 = memref1 VS1_2 -
6182 VS1_2: vx2 = memref2 VS1_3 -
6183 VS1_3: vx3 = memref3 - -
6184 S1: x = load - VS1_0
6187 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6188 information we recorded in RELATED_STMT field is used to vectorize
6191 /* In case of interleaving (non-unit grouped access):
6198 Vectorized loads are created in the order of memory accesses
6199 starting from the access of the first stmt of the chain:
6202 VS2: vx1 = &base + vec_size*1
6203 VS3: vx3 = &base + vec_size*2
6204 VS4: vx4 = &base + vec_size*3
6206 Then permutation statements are generated:
6208 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6209 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6212 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6213 (the order of the data-refs in the output of vect_permute_load_chain
6214 corresponds to the order of scalar stmts in the interleaving chain - see
6215 the documentation of vect_permute_load_chain()).
6216 The generation of permutation stmts and recording them in
6217 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6219 In case of both multiple types and interleaving, the vector loads and
6220 permutation stmts above are created for every copy. The result vector
6221 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6222 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6224 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6225 on a target that supports unaligned accesses (dr_unaligned_supported)
6226 we generate the following code:
6230 p = p + indx * vectype_size;
6235 Otherwise, the data reference is potentially unaligned on a target that
6236 does not support unaligned accesses (dr_explicit_realign_optimized) -
6237 then generate the following code, in which the data in each iteration is
6238 obtained by two vector loads, one from the previous iteration, and one
6239 from the current iteration:
6241 msq_init = *(floor(p1))
6242 p2 = initial_addr + VS - 1;
6243 realignment_token = call target_builtin;
6246 p2 = p2 + indx * vectype_size
6248 vec_dest = realign_load (msq, lsq, realignment_token)
6253 /* If the misalignment remains the same throughout the execution of the
6254 loop, we can create the init_addr and permutation mask at the loop
6255 preheader. Otherwise, it needs to be created inside the loop.
6256 This can only occur when vectorizing memory accesses in the inner-loop
6257 nested within an outer-loop that is being vectorized. */
6259 if (nested_in_vect_loop
6260 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6261 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6263 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6264 compute_in_loop
= true;
6267 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6268 || alignment_support_scheme
== dr_explicit_realign
)
6269 && !compute_in_loop
)
6271 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6272 alignment_support_scheme
, NULL_TREE
,
6274 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6276 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6277 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6285 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6288 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6290 aggr_type
= vectype
;
6292 prev_stmt_info
= NULL
;
6293 for (j
= 0; j
< ncopies
; j
++)
6295 /* 1. Create the vector or array pointer update chain. */
6298 bool simd_lane_access_p
6299 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6300 if (simd_lane_access_p
6301 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6302 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6303 && integer_zerop (DR_OFFSET (first_dr
))
6304 && integer_zerop (DR_INIT (first_dr
))
6305 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6306 get_alias_set (DR_REF (first_dr
)))
6307 && (alignment_support_scheme
== dr_aligned
6308 || alignment_support_scheme
== dr_unaligned_supported
))
6310 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6311 dataref_offset
= build_int_cst (reference_alias_ptr_type
6312 (DR_REF (first_dr
)), 0);
6317 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6318 offset
, &dummy
, gsi
, &ptr_incr
,
6319 simd_lane_access_p
, &inv_p
,
6322 else if (dataref_offset
)
6323 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6324 TYPE_SIZE_UNIT (aggr_type
));
6326 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6327 TYPE_SIZE_UNIT (aggr_type
));
6329 if (grouped_load
|| slp_perm
)
6330 dr_chain
.create (vec_num
);
6336 vec_array
= create_vector_array (vectype
, vec_num
);
6339 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6340 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6341 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6342 gimple_call_set_lhs (new_stmt
, vec_array
);
6343 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6345 /* Extract each vector into an SSA_NAME. */
6346 for (i
= 0; i
< vec_num
; i
++)
6348 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6350 dr_chain
.quick_push (new_temp
);
6353 /* Record the mapping between SSA_NAMEs and statements. */
6354 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6358 for (i
= 0; i
< vec_num
; i
++)
6361 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6364 /* 2. Create the vector-load in the loop. */
6365 switch (alignment_support_scheme
)
6368 case dr_unaligned_supported
:
6370 unsigned int align
, misalign
;
6373 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6376 : build_int_cst (reference_alias_ptr_type
6377 (DR_REF (first_dr
)), 0));
6378 align
= TYPE_ALIGN_UNIT (vectype
);
6379 if (alignment_support_scheme
== dr_aligned
)
6381 gcc_assert (aligned_access_p (first_dr
));
6384 else if (DR_MISALIGNMENT (first_dr
) == -1)
6386 TREE_TYPE (data_ref
)
6387 = build_aligned_type (TREE_TYPE (data_ref
),
6388 TYPE_ALIGN (elem_type
));
6389 align
= TYPE_ALIGN_UNIT (elem_type
);
6394 TREE_TYPE (data_ref
)
6395 = build_aligned_type (TREE_TYPE (data_ref
),
6396 TYPE_ALIGN (elem_type
));
6397 misalign
= DR_MISALIGNMENT (first_dr
);
6399 if (dataref_offset
== NULL_TREE
)
6400 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6404 case dr_explicit_realign
:
6409 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6411 if (compute_in_loop
)
6412 msq
= vect_setup_realignment (first_stmt
, gsi
,
6414 dr_explicit_realign
,
6417 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
6418 new_stmt
= gimple_build_assign_with_ops
6419 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
6421 (TREE_TYPE (dataref_ptr
),
6422 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6423 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6425 = build2 (MEM_REF
, vectype
, ptr
,
6426 build_int_cst (reference_alias_ptr_type
6427 (DR_REF (first_dr
)), 0));
6428 vec_dest
= vect_create_destination_var (scalar_dest
,
6430 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6431 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6432 gimple_assign_set_lhs (new_stmt
, new_temp
);
6433 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6434 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6435 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6438 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6439 TYPE_SIZE_UNIT (elem_type
));
6440 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6441 new_stmt
= gimple_build_assign_with_ops
6442 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
6445 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6446 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6447 gimple_assign_set_lhs (new_stmt
, ptr
);
6448 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6450 = build2 (MEM_REF
, vectype
, ptr
,
6451 build_int_cst (reference_alias_ptr_type
6452 (DR_REF (first_dr
)), 0));
6455 case dr_explicit_realign_optimized
:
6456 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
6457 new_stmt
= gimple_build_assign_with_ops
6458 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
6460 (TREE_TYPE (dataref_ptr
),
6461 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6462 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6464 = build2 (MEM_REF
, vectype
, new_temp
,
6465 build_int_cst (reference_alias_ptr_type
6466 (DR_REF (first_dr
)), 0));
6471 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6472 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6473 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6474 gimple_assign_set_lhs (new_stmt
, new_temp
);
6475 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6477 /* 3. Handle explicit realignment if necessary/supported.
6479 vec_dest = realign_load (msq, lsq, realignment_token) */
6480 if (alignment_support_scheme
== dr_explicit_realign_optimized
6481 || alignment_support_scheme
== dr_explicit_realign
)
6483 lsq
= gimple_assign_lhs (new_stmt
);
6484 if (!realignment_token
)
6485 realignment_token
= dataref_ptr
;
6486 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6488 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
6491 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6492 gimple_assign_set_lhs (new_stmt
, new_temp
);
6493 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6495 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6498 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6499 add_phi_arg (phi
, lsq
,
6500 loop_latch_edge (containing_loop
),
6506 /* 4. Handle invariant-load. */
6507 if (inv_p
&& !bb_vinfo
)
6509 gcc_assert (!grouped_load
);
6510 /* If we have versioned for aliasing or the loop doesn't
6511 have any data dependencies that would preclude this,
6512 then we are sure this is a loop invariant load and
6513 thus we can insert it on the preheader edge. */
6514 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6515 && !nested_in_vect_loop
6516 && hoist_defs_of_uses (stmt
, loop
))
6518 if (dump_enabled_p ())
6520 dump_printf_loc (MSG_NOTE
, vect_location
,
6521 "hoisting out of the vectorized "
6523 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6524 dump_printf (MSG_NOTE
, "\n");
6526 tree tem
= copy_ssa_name (scalar_dest
, NULL
);
6527 gsi_insert_on_edge_immediate
6528 (loop_preheader_edge (loop
),
6529 gimple_build_assign (tem
,
6531 (gimple_assign_rhs1 (stmt
))));
6532 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6536 gimple_stmt_iterator gsi2
= *gsi
;
6538 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6541 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6542 set_vinfo_for_stmt (new_stmt
,
6543 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6549 tree perm_mask
= perm_mask_for_reverse (vectype
);
6550 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6551 perm_mask
, stmt
, gsi
);
6552 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6555 /* Collect vector loads and later create their permutation in
6556 vect_transform_grouped_load (). */
6557 if (grouped_load
|| slp_perm
)
6558 dr_chain
.quick_push (new_temp
);
6560 /* Store vector loads in the corresponding SLP_NODE. */
6561 if (slp
&& !slp_perm
)
6562 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6564 /* Bump the vector pointer to account for a gap. */
6565 if (slp
&& group_gap
!= 0)
6567 tree bump
= size_binop (MULT_EXPR
,
6568 TYPE_SIZE_UNIT (elem_type
),
6569 size_int (group_gap
));
6570 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6575 if (slp
&& !slp_perm
)
6580 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6581 slp_node_instance
, false))
6583 dr_chain
.release ();
6592 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6593 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6598 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6600 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6601 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6604 dr_chain
.release ();
6610 /* Function vect_is_simple_cond.
6613 LOOP - the loop that is being vectorized.
6614 COND - Condition that is checked for simple use.
6617 *COMP_VECTYPE - the vector type for the comparison.
6619 Returns whether a COND can be vectorized. Checks whether
6620 condition operands are supportable using vec_is_simple_use. */
6623 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6624 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6628 enum vect_def_type dt
;
6629 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6631 if (!COMPARISON_CLASS_P (cond
))
6634 lhs
= TREE_OPERAND (cond
, 0);
6635 rhs
= TREE_OPERAND (cond
, 1);
6637 if (TREE_CODE (lhs
) == SSA_NAME
)
6639 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6640 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6641 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6644 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6645 && TREE_CODE (lhs
) != FIXED_CST
)
6648 if (TREE_CODE (rhs
) == SSA_NAME
)
6650 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6651 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6652 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6655 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6656 && TREE_CODE (rhs
) != FIXED_CST
)
6659 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6663 /* vectorizable_condition.
6665 Check if STMT is conditional modify expression that can be vectorized.
6666 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6667 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6670 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6671 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6672 else caluse if it is 2).
6674 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6677 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6678 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6681 tree scalar_dest
= NULL_TREE
;
6682 tree vec_dest
= NULL_TREE
;
6683 tree cond_expr
, then_clause
, else_clause
;
6684 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6685 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6686 tree comp_vectype
= NULL_TREE
;
6687 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6688 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6689 tree vec_compare
, vec_cond_expr
;
6691 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6693 enum vect_def_type dt
, dts
[4];
6694 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6696 enum tree_code code
;
6697 stmt_vec_info prev_stmt_info
= NULL
;
6699 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6700 vec
<tree
> vec_oprnds0
= vNULL
;
6701 vec
<tree
> vec_oprnds1
= vNULL
;
6702 vec
<tree
> vec_oprnds2
= vNULL
;
6703 vec
<tree
> vec_oprnds3
= vNULL
;
6706 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6709 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6711 gcc_assert (ncopies
>= 1);
6712 if (reduc_index
&& ncopies
> 1)
6713 return false; /* FORNOW */
6715 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6718 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6721 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6722 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6726 /* FORNOW: not yet supported. */
6727 if (STMT_VINFO_LIVE_P (stmt_info
))
6729 if (dump_enabled_p ())
6730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6731 "value used after loop.\n");
6735 /* Is vectorizable conditional operation? */
6736 if (!is_gimple_assign (stmt
))
6739 code
= gimple_assign_rhs_code (stmt
);
6741 if (code
!= COND_EXPR
)
6744 cond_expr
= gimple_assign_rhs1 (stmt
);
6745 then_clause
= gimple_assign_rhs2 (stmt
);
6746 else_clause
= gimple_assign_rhs3 (stmt
);
6748 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6753 if (TREE_CODE (then_clause
) == SSA_NAME
)
6755 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6756 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6757 &then_def_stmt
, &def
, &dt
))
6760 else if (TREE_CODE (then_clause
) != INTEGER_CST
6761 && TREE_CODE (then_clause
) != REAL_CST
6762 && TREE_CODE (then_clause
) != FIXED_CST
)
6765 if (TREE_CODE (else_clause
) == SSA_NAME
)
6767 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6768 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6769 &else_def_stmt
, &def
, &dt
))
6772 else if (TREE_CODE (else_clause
) != INTEGER_CST
6773 && TREE_CODE (else_clause
) != REAL_CST
6774 && TREE_CODE (else_clause
) != FIXED_CST
)
6777 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
6778 /* The result of a vector comparison should be signed type. */
6779 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
6780 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
6781 if (vec_cmp_type
== NULL_TREE
)
6786 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6787 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
6794 vec_oprnds0
.create (1);
6795 vec_oprnds1
.create (1);
6796 vec_oprnds2
.create (1);
6797 vec_oprnds3
.create (1);
6801 scalar_dest
= gimple_assign_lhs (stmt
);
6802 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6804 /* Handle cond expr. */
6805 for (j
= 0; j
< ncopies
; j
++)
6807 gassign
*new_stmt
= NULL
;
6812 auto_vec
<tree
, 4> ops
;
6813 auto_vec
<vec
<tree
>, 4> vec_defs
;
6815 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
6816 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
6817 ops
.safe_push (then_clause
);
6818 ops
.safe_push (else_clause
);
6819 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
6820 vec_oprnds3
= vec_defs
.pop ();
6821 vec_oprnds2
= vec_defs
.pop ();
6822 vec_oprnds1
= vec_defs
.pop ();
6823 vec_oprnds0
= vec_defs
.pop ();
6826 vec_defs
.release ();
6832 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
6834 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
6835 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
6838 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
6840 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
6841 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
6842 if (reduc_index
== 1)
6843 vec_then_clause
= reduc_def
;
6846 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
6848 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
6849 NULL
, >emp
, &def
, &dts
[2]);
6851 if (reduc_index
== 2)
6852 vec_else_clause
= reduc_def
;
6855 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
6857 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
6858 NULL
, >emp
, &def
, &dts
[3]);
6864 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
6865 vec_oprnds0
.pop ());
6866 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
6867 vec_oprnds1
.pop ());
6868 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
6869 vec_oprnds2
.pop ());
6870 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
6871 vec_oprnds3
.pop ());
6876 vec_oprnds0
.quick_push (vec_cond_lhs
);
6877 vec_oprnds1
.quick_push (vec_cond_rhs
);
6878 vec_oprnds2
.quick_push (vec_then_clause
);
6879 vec_oprnds3
.quick_push (vec_else_clause
);
6882 /* Arguments are ready. Create the new vector stmt. */
6883 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
6885 vec_cond_rhs
= vec_oprnds1
[i
];
6886 vec_then_clause
= vec_oprnds2
[i
];
6887 vec_else_clause
= vec_oprnds3
[i
];
6889 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
6890 vec_cond_lhs
, vec_cond_rhs
);
6891 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6892 vec_compare
, vec_then_clause
, vec_else_clause
);
6894 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6895 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6896 gimple_assign_set_lhs (new_stmt
, new_temp
);
6897 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6899 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6906 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6908 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6910 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6913 vec_oprnds0
.release ();
6914 vec_oprnds1
.release ();
6915 vec_oprnds2
.release ();
6916 vec_oprnds3
.release ();
6922 /* Make sure the statement is vectorizable. */
6925 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
6927 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6928 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6929 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
6931 tree scalar_type
, vectype
;
6932 gimple pattern_stmt
;
6933 gimple_seq pattern_def_seq
;
6935 if (dump_enabled_p ())
6937 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
6938 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6939 dump_printf (MSG_NOTE
, "\n");
6942 if (gimple_has_volatile_ops (stmt
))
6944 if (dump_enabled_p ())
6945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6946 "not vectorized: stmt has volatile operands\n");
6951 /* Skip stmts that do not need to be vectorized. In loops this is expected
6953 - the COND_EXPR which is the loop exit condition
6954 - any LABEL_EXPRs in the loop
6955 - computations that are used only for array indexing or loop control.
6956 In basic blocks we only analyze statements that are a part of some SLP
6957 instance, therefore, all the statements are relevant.
6959 Pattern statement needs to be analyzed instead of the original statement
6960 if the original statement is not relevant. Otherwise, we analyze both
6961 statements. In basic blocks we are called from some SLP instance
6962 traversal, don't analyze pattern stmts instead, the pattern stmts
6963 already will be part of SLP instance. */
6965 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
6966 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6967 && !STMT_VINFO_LIVE_P (stmt_info
))
6969 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6971 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6972 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6974 /* Analyze PATTERN_STMT instead of the original stmt. */
6975 stmt
= pattern_stmt
;
6976 stmt_info
= vinfo_for_stmt (pattern_stmt
);
6977 if (dump_enabled_p ())
6979 dump_printf_loc (MSG_NOTE
, vect_location
,
6980 "==> examining pattern statement: ");
6981 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6982 dump_printf (MSG_NOTE
, "\n");
6987 if (dump_enabled_p ())
6988 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
6993 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6996 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6997 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6999 /* Analyze PATTERN_STMT too. */
7000 if (dump_enabled_p ())
7002 dump_printf_loc (MSG_NOTE
, vect_location
,
7003 "==> examining pattern statement: ");
7004 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7005 dump_printf (MSG_NOTE
, "\n");
7008 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7012 if (is_pattern_stmt_p (stmt_info
)
7014 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7016 gimple_stmt_iterator si
;
7018 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7020 gimple pattern_def_stmt
= gsi_stmt (si
);
7021 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7022 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7024 /* Analyze def stmt of STMT if it's a pattern stmt. */
7025 if (dump_enabled_p ())
7027 dump_printf_loc (MSG_NOTE
, vect_location
,
7028 "==> examining pattern def statement: ");
7029 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7030 dump_printf (MSG_NOTE
, "\n");
7033 if (!vect_analyze_stmt (pattern_def_stmt
,
7034 need_to_vectorize
, node
))
7040 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7042 case vect_internal_def
:
7045 case vect_reduction_def
:
7046 case vect_nested_cycle
:
7047 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7048 || relevance
== vect_used_in_outer_by_reduction
7049 || relevance
== vect_unused_in_scope
));
7052 case vect_induction_def
:
7053 case vect_constant_def
:
7054 case vect_external_def
:
7055 case vect_unknown_def_type
:
7062 gcc_assert (PURE_SLP_STMT (stmt_info
));
7064 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7065 if (dump_enabled_p ())
7067 dump_printf_loc (MSG_NOTE
, vect_location
,
7068 "get vectype for scalar type: ");
7069 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7070 dump_printf (MSG_NOTE
, "\n");
7073 vectype
= get_vectype_for_scalar_type (scalar_type
);
7076 if (dump_enabled_p ())
7078 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7079 "not SLPed: unsupported data-type ");
7080 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7082 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7087 if (dump_enabled_p ())
7089 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7090 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7091 dump_printf (MSG_NOTE
, "\n");
7094 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7097 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7099 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7100 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7101 || (is_gimple_call (stmt
)
7102 && gimple_call_lhs (stmt
) == NULL_TREE
));
7103 *need_to_vectorize
= true;
7108 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7109 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7110 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7111 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7112 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7113 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7114 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7115 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7116 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7117 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7118 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7119 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7123 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7124 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7125 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7126 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7127 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7128 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7129 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7130 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7131 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7136 if (dump_enabled_p ())
7138 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7139 "not vectorized: relevant stmt not ");
7140 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7141 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7142 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7151 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7152 need extra handling, except for vectorizable reductions. */
7153 if (STMT_VINFO_LIVE_P (stmt_info
)
7154 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7155 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7159 if (dump_enabled_p ())
7161 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7162 "not vectorized: live stmt not ");
7163 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7164 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7165 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7175 /* Function vect_transform_stmt.
7177 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7180 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7181 bool *grouped_store
, slp_tree slp_node
,
7182 slp_instance slp_node_instance
)
7184 bool is_store
= false;
7185 gimple vec_stmt
= NULL
;
7186 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7189 switch (STMT_VINFO_TYPE (stmt_info
))
7191 case type_demotion_vec_info_type
:
7192 case type_promotion_vec_info_type
:
7193 case type_conversion_vec_info_type
:
7194 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7198 case induc_vec_info_type
:
7199 gcc_assert (!slp_node
);
7200 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7204 case shift_vec_info_type
:
7205 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7209 case op_vec_info_type
:
7210 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7214 case assignment_vec_info_type
:
7215 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7219 case load_vec_info_type
:
7220 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7225 case store_vec_info_type
:
7226 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7228 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7230 /* In case of interleaving, the whole chain is vectorized when the
7231 last store in the chain is reached. Store stmts before the last
7232 one are skipped, and there vec_stmt_info shouldn't be freed
7234 *grouped_store
= true;
7235 if (STMT_VINFO_VEC_STMT (stmt_info
))
7242 case condition_vec_info_type
:
7243 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7247 case call_vec_info_type
:
7248 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7249 stmt
= gsi_stmt (*gsi
);
7250 if (is_gimple_call (stmt
)
7251 && gimple_call_internal_p (stmt
)
7252 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7256 case call_simd_clone_vec_info_type
:
7257 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7258 stmt
= gsi_stmt (*gsi
);
7261 case reduc_vec_info_type
:
7262 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7267 if (!STMT_VINFO_LIVE_P (stmt_info
))
7269 if (dump_enabled_p ())
7270 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7271 "stmt not supported.\n");
7276 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7277 is being vectorized, but outside the immediately enclosing loop. */
7279 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7280 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7281 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7282 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7283 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7284 || STMT_VINFO_RELEVANT (stmt_info
) ==
7285 vect_used_in_outer_by_reduction
))
7287 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7288 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7289 imm_use_iterator imm_iter
;
7290 use_operand_p use_p
;
7294 if (dump_enabled_p ())
7295 dump_printf_loc (MSG_NOTE
, vect_location
,
7296 "Record the vdef for outer-loop vectorization.\n");
7298 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7299 (to be used when vectorizing outer-loop stmts that use the DEF of
7301 if (gimple_code (stmt
) == GIMPLE_PHI
)
7302 scalar_dest
= PHI_RESULT (stmt
);
7304 scalar_dest
= gimple_assign_lhs (stmt
);
7306 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7308 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7310 exit_phi
= USE_STMT (use_p
);
7311 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7316 /* Handle stmts whose DEF is used outside the loop-nest that is
7317 being vectorized. */
7318 if (STMT_VINFO_LIVE_P (stmt_info
)
7319 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7321 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7326 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7332 /* Remove a group of stores (for SLP or interleaving), free their
7336 vect_remove_stores (gimple first_stmt
)
7338 gimple next
= first_stmt
;
7340 gimple_stmt_iterator next_si
;
7344 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7346 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7347 if (is_pattern_stmt_p (stmt_info
))
7348 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7349 /* Free the attached stmt_vec_info and remove the stmt. */
7350 next_si
= gsi_for_stmt (next
);
7351 unlink_stmt_vdef (next
);
7352 gsi_remove (&next_si
, true);
7353 release_defs (next
);
7354 free_stmt_vec_info (next
);
7360 /* Function new_stmt_vec_info.
7362 Create and initialize a new stmt_vec_info struct for STMT. */
7365 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7366 bb_vec_info bb_vinfo
)
7369 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7371 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7372 STMT_VINFO_STMT (res
) = stmt
;
7373 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7374 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7375 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7376 STMT_VINFO_LIVE_P (res
) = false;
7377 STMT_VINFO_VECTYPE (res
) = NULL
;
7378 STMT_VINFO_VEC_STMT (res
) = NULL
;
7379 STMT_VINFO_VECTORIZABLE (res
) = true;
7380 STMT_VINFO_IN_PATTERN_P (res
) = false;
7381 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7382 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7383 STMT_VINFO_DATA_REF (res
) = NULL
;
7385 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7386 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7387 STMT_VINFO_DR_INIT (res
) = NULL
;
7388 STMT_VINFO_DR_STEP (res
) = NULL
;
7389 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7391 if (gimple_code (stmt
) == GIMPLE_PHI
7392 && is_loop_header_bb_p (gimple_bb (stmt
)))
7393 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7395 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7397 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7398 STMT_SLP_TYPE (res
) = loop_vect
;
7399 GROUP_FIRST_ELEMENT (res
) = NULL
;
7400 GROUP_NEXT_ELEMENT (res
) = NULL
;
7401 GROUP_SIZE (res
) = 0;
7402 GROUP_STORE_COUNT (res
) = 0;
7403 GROUP_GAP (res
) = 0;
7404 GROUP_SAME_DR_STMT (res
) = NULL
;
7410 /* Create a hash table for stmt_vec_info. */
7413 init_stmt_vec_info_vec (void)
7415 gcc_assert (!stmt_vec_info_vec
.exists ());
7416 stmt_vec_info_vec
.create (50);
7420 /* Free hash table for stmt_vec_info. */
7423 free_stmt_vec_info_vec (void)
7427 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7429 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7430 gcc_assert (stmt_vec_info_vec
.exists ());
7431 stmt_vec_info_vec
.release ();
7435 /* Free stmt vectorization related info. */
7438 free_stmt_vec_info (gimple stmt
)
7440 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7445 /* Check if this statement has a related "pattern stmt"
7446 (introduced by the vectorizer during the pattern recognition
7447 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7449 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7451 stmt_vec_info patt_info
7452 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7455 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7456 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7457 gimple_set_bb (patt_stmt
, NULL
);
7458 tree lhs
= gimple_get_lhs (patt_stmt
);
7459 if (TREE_CODE (lhs
) == SSA_NAME
)
7460 release_ssa_name (lhs
);
7463 gimple_stmt_iterator si
;
7464 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7466 gimple seq_stmt
= gsi_stmt (si
);
7467 gimple_set_bb (seq_stmt
, NULL
);
7468 lhs
= gimple_get_lhs (patt_stmt
);
7469 if (TREE_CODE (lhs
) == SSA_NAME
)
7470 release_ssa_name (lhs
);
7471 free_stmt_vec_info (seq_stmt
);
7474 free_stmt_vec_info (patt_stmt
);
7478 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7479 set_vinfo_for_stmt (stmt
, NULL
);
7484 /* Function get_vectype_for_scalar_type_and_size.
7486 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7490 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7492 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7493 enum machine_mode simd_mode
;
7494 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7501 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7502 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7505 /* For vector types of elements whose mode precision doesn't
7506 match their types precision we use a element type of mode
7507 precision. The vectorization routines will have to make sure
7508 they support the proper result truncation/extension.
7509 We also make sure to build vector types with INTEGER_TYPE
7510 component type only. */
7511 if (INTEGRAL_TYPE_P (scalar_type
)
7512 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7513 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7514 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7515 TYPE_UNSIGNED (scalar_type
));
7517 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7518 When the component mode passes the above test simply use a type
7519 corresponding to that mode. The theory is that any use that
7520 would cause problems with this will disable vectorization anyway. */
7521 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7522 && !INTEGRAL_TYPE_P (scalar_type
))
7523 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7525 /* We can't build a vector type of elements with alignment bigger than
7527 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7528 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7529 TYPE_UNSIGNED (scalar_type
));
7531 /* If we felt back to using the mode fail if there was
7532 no scalar type for it. */
7533 if (scalar_type
== NULL_TREE
)
7536 /* If no size was supplied use the mode the target prefers. Otherwise
7537 lookup a vector mode of the specified size. */
7539 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7541 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7542 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7546 vectype
= build_vector_type (scalar_type
, nunits
);
7548 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7549 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7555 unsigned int current_vector_size
;
7557 /* Function get_vectype_for_scalar_type.
7559 Returns the vector type corresponding to SCALAR_TYPE as supported
7563 get_vectype_for_scalar_type (tree scalar_type
)
7566 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7567 current_vector_size
);
7569 && current_vector_size
== 0)
7570 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7574 /* Function get_same_sized_vectype
7576 Returns a vector type corresponding to SCALAR_TYPE of size
7577 VECTOR_TYPE if supported by the target. */
7580 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7582 return get_vectype_for_scalar_type_and_size
7583 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7586 /* Function vect_is_simple_use.
7589 LOOP_VINFO - the vect info of the loop that is being vectorized.
7590 BB_VINFO - the vect info of the basic block that is being vectorized.
7591 OPERAND - operand of STMT in the loop or bb.
7592 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7594 Returns whether a stmt with OPERAND can be vectorized.
7595 For loops, supportable operands are constants, loop invariants, and operands
7596 that are defined by the current iteration of the loop. Unsupportable
7597 operands are those that are defined by a previous iteration of the loop (as
7598 is the case in reduction/induction computations).
7599 For basic blocks, supportable operands are constants and bb invariants.
7600 For now, operands defined outside the basic block are not supported. */
7603 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7604 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7605 tree
*def
, enum vect_def_type
*dt
)
7608 stmt_vec_info stmt_vinfo
;
7609 struct loop
*loop
= NULL
;
7612 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7617 if (dump_enabled_p ())
7619 dump_printf_loc (MSG_NOTE
, vect_location
,
7620 "vect_is_simple_use: operand ");
7621 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7622 dump_printf (MSG_NOTE
, "\n");
7625 if (CONSTANT_CLASS_P (operand
))
7627 *dt
= vect_constant_def
;
7631 if (is_gimple_min_invariant (operand
))
7634 *dt
= vect_external_def
;
7638 if (TREE_CODE (operand
) == PAREN_EXPR
)
7640 if (dump_enabled_p ())
7641 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7642 operand
= TREE_OPERAND (operand
, 0);
7645 if (TREE_CODE (operand
) != SSA_NAME
)
7647 if (dump_enabled_p ())
7648 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7653 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7654 if (*def_stmt
== NULL
)
7656 if (dump_enabled_p ())
7657 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7662 if (dump_enabled_p ())
7664 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7666 dump_printf (MSG_NOTE
, "\n");
7669 /* Empty stmt is expected only in case of a function argument.
7670 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7671 if (gimple_nop_p (*def_stmt
))
7674 *dt
= vect_external_def
;
7678 bb
= gimple_bb (*def_stmt
);
7680 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7681 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7682 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7683 *dt
= vect_external_def
;
7686 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7687 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7690 if (*dt
== vect_unknown_def_type
7692 && *dt
== vect_double_reduction_def
7693 && gimple_code (stmt
) != GIMPLE_PHI
))
7695 if (dump_enabled_p ())
7696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7697 "Unsupported pattern.\n");
7701 if (dump_enabled_p ())
7702 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
7704 switch (gimple_code (*def_stmt
))
7707 *def
= gimple_phi_result (*def_stmt
);
7711 *def
= gimple_assign_lhs (*def_stmt
);
7715 *def
= gimple_call_lhs (*def_stmt
);
7720 if (dump_enabled_p ())
7721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7722 "unsupported defining stmt:\n");
7729 /* Function vect_is_simple_use_1.
7731 Same as vect_is_simple_use_1 but also determines the vector operand
7732 type of OPERAND and stores it to *VECTYPE. If the definition of
7733 OPERAND is vect_uninitialized_def, vect_constant_def or
7734 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7735 is responsible to compute the best suited vector type for the
7739 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7740 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7741 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
7743 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
7747 /* Now get a vector type if the def is internal, otherwise supply
7748 NULL_TREE and leave it up to the caller to figure out a proper
7749 type for the use stmt. */
7750 if (*dt
== vect_internal_def
7751 || *dt
== vect_induction_def
7752 || *dt
== vect_reduction_def
7753 || *dt
== vect_double_reduction_def
7754 || *dt
== vect_nested_cycle
)
7756 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
7758 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7759 && !STMT_VINFO_RELEVANT (stmt_info
)
7760 && !STMT_VINFO_LIVE_P (stmt_info
))
7761 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7763 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7764 gcc_assert (*vectype
!= NULL_TREE
);
7766 else if (*dt
== vect_uninitialized_def
7767 || *dt
== vect_constant_def
7768 || *dt
== vect_external_def
)
7769 *vectype
= NULL_TREE
;
7777 /* Function supportable_widening_operation
7779 Check whether an operation represented by the code CODE is a
7780 widening operation that is supported by the target platform in
7781 vector form (i.e., when operating on arguments of type VECTYPE_IN
7782 producing a result of type VECTYPE_OUT).
7784 Widening operations we currently support are NOP (CONVERT), FLOAT
7785 and WIDEN_MULT. This function checks if these operations are supported
7786 by the target platform either directly (via vector tree-codes), or via
7790 - CODE1 and CODE2 are codes of vector operations to be used when
7791 vectorizing the operation, if available.
7792 - MULTI_STEP_CVT determines the number of required intermediate steps in
7793 case of multi-step conversion (like char->short->int - in that case
7794 MULTI_STEP_CVT will be 1).
7795 - INTERM_TYPES contains the intermediate type required to perform the
7796 widening operation (short in the above example). */
7799 supportable_widening_operation (enum tree_code code
, gimple stmt
,
7800 tree vectype_out
, tree vectype_in
,
7801 enum tree_code
*code1
, enum tree_code
*code2
,
7802 int *multi_step_cvt
,
7803 vec
<tree
> *interm_types
)
7805 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7806 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7807 struct loop
*vect_loop
= NULL
;
7808 enum machine_mode vec_mode
;
7809 enum insn_code icode1
, icode2
;
7810 optab optab1
, optab2
;
7811 tree vectype
= vectype_in
;
7812 tree wide_vectype
= vectype_out
;
7813 enum tree_code c1
, c2
;
7815 tree prev_type
, intermediate_type
;
7816 enum machine_mode intermediate_mode
, prev_mode
;
7817 optab optab3
, optab4
;
7819 *multi_step_cvt
= 0;
7821 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
7825 case WIDEN_MULT_EXPR
:
7826 /* The result of a vectorized widening operation usually requires
7827 two vectors (because the widened results do not fit into one vector).
7828 The generated vector results would normally be expected to be
7829 generated in the same order as in the original scalar computation,
7830 i.e. if 8 results are generated in each vector iteration, they are
7831 to be organized as follows:
7832 vect1: [res1,res2,res3,res4],
7833 vect2: [res5,res6,res7,res8].
7835 However, in the special case that the result of the widening
7836 operation is used in a reduction computation only, the order doesn't
7837 matter (because when vectorizing a reduction we change the order of
7838 the computation). Some targets can take advantage of this and
7839 generate more efficient code. For example, targets like Altivec,
7840 that support widen_mult using a sequence of {mult_even,mult_odd}
7841 generate the following vectors:
7842 vect1: [res1,res3,res5,res7],
7843 vect2: [res2,res4,res6,res8].
7845 When vectorizing outer-loops, we execute the inner-loop sequentially
7846 (each vectorized inner-loop iteration contributes to VF outer-loop
7847 iterations in parallel). We therefore don't allow to change the
7848 order of the computation in the inner-loop during outer-loop
7850 /* TODO: Another case in which order doesn't *really* matter is when we
7851 widen and then contract again, e.g. (short)((int)x * y >> 8).
7852 Normally, pack_trunc performs an even/odd permute, whereas the
7853 repack from an even/odd expansion would be an interleave, which
7854 would be significantly simpler for e.g. AVX2. */
7855 /* In any case, in order to avoid duplicating the code below, recurse
7856 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7857 are properly set up for the caller. If we fail, we'll continue with
7858 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7860 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
7861 && !nested_in_vect_loop_p (vect_loop
, stmt
)
7862 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
7863 stmt
, vectype_out
, vectype_in
,
7864 code1
, code2
, multi_step_cvt
,
7867 /* Elements in a vector with vect_used_by_reduction property cannot
7868 be reordered if the use chain with this property does not have the
7869 same operation. One such an example is s += a * b, where elements
7870 in a and b cannot be reordered. Here we check if the vector defined
7871 by STMT is only directly used in the reduction statement. */
7872 tree lhs
= gimple_assign_lhs (stmt
);
7873 use_operand_p dummy
;
7875 stmt_vec_info use_stmt_info
= NULL
;
7876 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
7877 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
7878 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
7881 c1
= VEC_WIDEN_MULT_LO_EXPR
;
7882 c2
= VEC_WIDEN_MULT_HI_EXPR
;
7885 case VEC_WIDEN_MULT_EVEN_EXPR
:
7886 /* Support the recursion induced just above. */
7887 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
7888 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
7891 case WIDEN_LSHIFT_EXPR
:
7892 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
7893 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
7897 c1
= VEC_UNPACK_LO_EXPR
;
7898 c2
= VEC_UNPACK_HI_EXPR
;
7902 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
7903 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
7906 case FIX_TRUNC_EXPR
:
7907 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7908 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7909 computing the operation. */
7916 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
7918 enum tree_code ctmp
= c1
;
7923 if (code
== FIX_TRUNC_EXPR
)
7925 /* The signedness is determined from output operand. */
7926 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7927 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
7931 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7932 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
7935 if (!optab1
|| !optab2
)
7938 vec_mode
= TYPE_MODE (vectype
);
7939 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
7940 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
7946 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7947 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7950 /* Check if it's a multi-step conversion that can be done using intermediate
7953 prev_type
= vectype
;
7954 prev_mode
= vec_mode
;
7956 if (!CONVERT_EXPR_CODE_P (code
))
7959 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7960 intermediate steps in promotion sequence. We try
7961 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7963 interm_types
->create (MAX_INTERM_CVT_STEPS
);
7964 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
7966 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
7968 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
7969 TYPE_UNSIGNED (prev_type
));
7970 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
7971 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
7973 if (!optab3
|| !optab4
7974 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
7975 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
7976 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
7977 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
7978 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
7979 == CODE_FOR_nothing
)
7980 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
7981 == CODE_FOR_nothing
))
7984 interm_types
->quick_push (intermediate_type
);
7985 (*multi_step_cvt
)++;
7987 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7988 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7991 prev_type
= intermediate_type
;
7992 prev_mode
= intermediate_mode
;
7995 interm_types
->release ();
8000 /* Function supportable_narrowing_operation
8002 Check whether an operation represented by the code CODE is a
8003 narrowing operation that is supported by the target platform in
8004 vector form (i.e., when operating on arguments of type VECTYPE_IN
8005 and producing a result of type VECTYPE_OUT).
8007 Narrowing operations we currently support are NOP (CONVERT) and
8008 FIX_TRUNC. This function checks if these operations are supported by
8009 the target platform directly via vector tree-codes.
8012 - CODE1 is the code of a vector operation to be used when
8013 vectorizing the operation, if available.
8014 - MULTI_STEP_CVT determines the number of required intermediate steps in
8015 case of multi-step conversion (like int->short->char - in that case
8016 MULTI_STEP_CVT will be 1).
8017 - INTERM_TYPES contains the intermediate type required to perform the
8018 narrowing operation (short in the above example). */
8021 supportable_narrowing_operation (enum tree_code code
,
8022 tree vectype_out
, tree vectype_in
,
8023 enum tree_code
*code1
, int *multi_step_cvt
,
8024 vec
<tree
> *interm_types
)
8026 enum machine_mode vec_mode
;
8027 enum insn_code icode1
;
8028 optab optab1
, interm_optab
;
8029 tree vectype
= vectype_in
;
8030 tree narrow_vectype
= vectype_out
;
8032 tree intermediate_type
;
8033 enum machine_mode intermediate_mode
, prev_mode
;
8037 *multi_step_cvt
= 0;
8041 c1
= VEC_PACK_TRUNC_EXPR
;
8044 case FIX_TRUNC_EXPR
:
8045 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8049 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8050 tree code and optabs used for computing the operation. */
8057 if (code
== FIX_TRUNC_EXPR
)
8058 /* The signedness is determined from output operand. */
8059 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8061 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8066 vec_mode
= TYPE_MODE (vectype
);
8067 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8072 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8075 /* Check if it's a multi-step conversion that can be done using intermediate
8077 prev_mode
= vec_mode
;
8078 if (code
== FIX_TRUNC_EXPR
)
8079 uns
= TYPE_UNSIGNED (vectype_out
);
8081 uns
= TYPE_UNSIGNED (vectype
);
8083 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8084 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8085 costly than signed. */
8086 if (code
== FIX_TRUNC_EXPR
&& uns
)
8088 enum insn_code icode2
;
8091 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8093 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8094 if (interm_optab
!= unknown_optab
8095 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8096 && insn_data
[icode1
].operand
[0].mode
8097 == insn_data
[icode2
].operand
[0].mode
)
8100 optab1
= interm_optab
;
8105 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8106 intermediate steps in promotion sequence. We try
8107 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8108 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8109 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8111 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8113 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8115 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8118 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8119 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8120 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8121 == CODE_FOR_nothing
))
8124 interm_types
->quick_push (intermediate_type
);
8125 (*multi_step_cvt
)++;
8127 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8130 prev_mode
= intermediate_mode
;
8131 optab1
= interm_optab
;
8134 interm_types
->release ();