1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "stor-layout.h"
35 #include "hard-reg-set.h"
38 #include "dominance.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
45 #include "gimple-expr.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
62 #include "recog.h" /* FIXME: for insn_data */
63 #include "insn-codes.h"
65 #include "diagnostic-core.h"
66 #include "tree-vectorizer.h"
69 #include "plugin-api.h"
74 /* For lang_hooks.types.type_for_mode. */
75 #include "langhooks.h"
77 /* Return the vectorized type for the given statement. */
80 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
82 return STMT_VINFO_VECTYPE (stmt_info
);
85 /* Return TRUE iff the given statement is in an inner loop relative to
86 the loop being vectorized. */
88 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
90 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
91 basic_block bb
= gimple_bb (stmt
);
92 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
98 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
100 return (bb
->loop_father
== loop
->inner
);
103 /* Record the cost of a statement, either by directly informing the
104 target model or by saving it in a vector for later processing.
105 Return a preliminary estimate of the statement's cost. */
108 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
109 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
110 int misalign
, enum vect_cost_model_location where
)
114 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
115 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
116 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
119 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
124 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
125 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
126 void *target_cost_data
;
129 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
131 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
133 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
138 /* Return a variable of type ELEM_TYPE[NELEMS]. */
141 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
143 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Return an SSA_NAME for the vector in index N. The reference
149 is part of the vectorization of STMT and the vector is associated
150 with scalar destination SCALAR_DEST. */
153 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
154 tree array
, unsigned HOST_WIDE_INT n
)
156 tree vect_type
, vect
, vect_name
, array_ref
;
159 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
160 vect_type
= TREE_TYPE (TREE_TYPE (array
));
161 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
162 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
163 build_int_cst (size_type_node
, n
),
164 NULL_TREE
, NULL_TREE
);
166 new_stmt
= gimple_build_assign (vect
, array_ref
);
167 vect_name
= make_ssa_name (vect
, new_stmt
);
168 gimple_assign_set_lhs (new_stmt
, vect_name
);
169 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
174 /* ARRAY is an array of vectors created by create_vector_array.
175 Emit code to store SSA_NAME VECT in index N of the array.
176 The store is part of the vectorization of STMT. */
179 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
180 tree array
, unsigned HOST_WIDE_INT n
)
185 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
186 build_int_cst (size_type_node
, n
),
187 NULL_TREE
, NULL_TREE
);
189 new_stmt
= gimple_build_assign (array_ref
, vect
);
190 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
193 /* PTR is a pointer to an array of type TYPE. Return a representation
194 of *PTR. The memory reference replaces those in FIRST_DR
198 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
200 tree mem_ref
, alias_ptr_type
;
202 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
203 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
204 /* Arrays have the same alignment as their type. */
205 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
209 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
211 /* Function vect_mark_relevant.
213 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
216 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
217 enum vect_relevant relevant
, bool live_p
,
218 bool used_in_pattern
)
220 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
221 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
222 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "mark relevant %d, live %d.\n", relevant
, live_p
);
229 /* If this stmt is an original stmt in a pattern, we might need to mark its
230 related pattern stmt instead of the original stmt. However, such stmts
231 may have their own uses that are not in any pattern, in such cases the
232 stmt itself should be marked. */
233 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
236 if (!used_in_pattern
)
238 imm_use_iterator imm_iter
;
242 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
243 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
245 if (is_gimple_assign (stmt
))
246 lhs
= gimple_assign_lhs (stmt
);
248 lhs
= gimple_call_lhs (stmt
);
250 /* This use is out of pattern use, if LHS has other uses that are
251 pattern uses, we should mark the stmt itself, and not the pattern
253 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
254 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
256 if (is_gimple_debug (USE_STMT (use_p
)))
258 use_stmt
= USE_STMT (use_p
);
260 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
263 if (vinfo_for_stmt (use_stmt
)
264 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
274 /* This is the last stmt in a sequence that was detected as a
275 pattern that can potentially be vectorized. Don't mark the stmt
276 as relevant/live because it's not going to be vectorized.
277 Instead mark the pattern-stmt that replaces it. */
279 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
281 if (dump_enabled_p ())
282 dump_printf_loc (MSG_NOTE
, vect_location
,
283 "last stmt in pattern. don't mark"
284 " relevant/live.\n");
285 stmt_info
= vinfo_for_stmt (pattern_stmt
);
286 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
287 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
288 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
293 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
294 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
295 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
297 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
298 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
300 if (dump_enabled_p ())
301 dump_printf_loc (MSG_NOTE
, vect_location
,
302 "already marked relevant/live.\n");
306 worklist
->safe_push (stmt
);
310 /* Function vect_stmt_relevant_p.
312 Return true if STMT in loop that is represented by LOOP_VINFO is
313 "relevant for vectorization".
315 A stmt is considered "relevant for vectorization" if:
316 - it has uses outside the loop.
317 - it has vdefs (it alters memory).
318 - control stmts in the loop (except for the exit condition).
320 CHECKME: what other side effects would the vectorizer allow? */
323 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
324 enum vect_relevant
*relevant
, bool *live_p
)
326 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
328 imm_use_iterator imm_iter
;
332 *relevant
= vect_unused_in_scope
;
335 /* cond stmt other than loop exit cond. */
336 if (is_ctrl_stmt (stmt
)
337 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
338 != loop_exit_ctrl_vec_info_type
)
339 *relevant
= vect_used_in_scope
;
341 /* changing memory. */
342 if (gimple_code (stmt
) != GIMPLE_PHI
)
343 if (gimple_vdef (stmt
))
345 if (dump_enabled_p ())
346 dump_printf_loc (MSG_NOTE
, vect_location
,
347 "vec_stmt_relevant_p: stmt has vdefs.\n");
348 *relevant
= vect_used_in_scope
;
351 /* uses outside the loop. */
352 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
354 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
356 basic_block bb
= gimple_bb (USE_STMT (use_p
));
357 if (!flow_bb_inside_loop_p (loop
, bb
))
359 if (dump_enabled_p ())
360 dump_printf_loc (MSG_NOTE
, vect_location
,
361 "vec_stmt_relevant_p: used out of loop.\n");
363 if (is_gimple_debug (USE_STMT (use_p
)))
366 /* We expect all such uses to be in the loop exit phis
367 (because of loop closed form) */
368 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
369 gcc_assert (bb
== single_exit (loop
)->dest
);
376 return (*live_p
|| *relevant
);
380 /* Function exist_non_indexing_operands_for_use_p
382 USE is one of the uses attached to STMT. Check if USE is
383 used in STMT for anything other than indexing an array. */
386 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
389 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
391 /* USE corresponds to some operand in STMT. If there is no data
392 reference in STMT, then any operand that corresponds to USE
393 is not indexing an array. */
394 if (!STMT_VINFO_DATA_REF (stmt_info
))
397 /* STMT has a data_ref. FORNOW this means that its of one of
401 (This should have been verified in analyze_data_refs).
403 'var' in the second case corresponds to a def, not a use,
404 so USE cannot correspond to any operands that are not used
407 Therefore, all we need to check is if STMT falls into the
408 first case, and whether var corresponds to USE. */
410 if (!gimple_assign_copy_p (stmt
))
412 if (is_gimple_call (stmt
)
413 && gimple_call_internal_p (stmt
))
414 switch (gimple_call_internal_fn (stmt
))
417 operand
= gimple_call_arg (stmt
, 3);
422 operand
= gimple_call_arg (stmt
, 2);
432 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
434 operand
= gimple_assign_rhs1 (stmt
);
435 if (TREE_CODE (operand
) != SSA_NAME
)
446 Function process_use.
449 - a USE in STMT in a loop represented by LOOP_VINFO
450 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
451 that defined USE. This is done by calling mark_relevant and passing it
452 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
453 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
457 Generally, LIVE_P and RELEVANT are used to define the liveness and
458 relevance info of the DEF_STMT of this USE:
459 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
460 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
462 - case 1: If USE is used only for address computations (e.g. array indexing),
463 which does not need to be directly vectorized, then the liveness/relevance
464 of the respective DEF_STMT is left unchanged.
465 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
466 skip DEF_STMT cause it had already been processed.
467 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
468 be modified accordingly.
470 Return true if everything is as expected. Return false otherwise. */
473 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
474 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
477 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
478 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
479 stmt_vec_info dstmt_vinfo
;
480 basic_block bb
, def_bb
;
483 enum vect_def_type dt
;
485 /* case 1: we are only interested in uses that need to be vectorized. Uses
486 that are used for address computation are not considered relevant. */
487 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
490 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
494 "not vectorized: unsupported use in stmt.\n");
498 if (!def_stmt
|| gimple_nop_p (def_stmt
))
501 def_bb
= gimple_bb (def_stmt
);
502 if (!flow_bb_inside_loop_p (loop
, def_bb
))
504 if (dump_enabled_p ())
505 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
509 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
510 DEF_STMT must have already been processed, because this should be the
511 only way that STMT, which is a reduction-phi, was put in the worklist,
512 as there should be no other uses for DEF_STMT in the loop. So we just
513 check that everything is as expected, and we are done. */
514 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
515 bb
= gimple_bb (stmt
);
516 if (gimple_code (stmt
) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
518 && gimple_code (def_stmt
) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
520 && bb
->loop_father
== def_bb
->loop_father
)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE
, vect_location
,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
526 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
527 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
528 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
529 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
533 /* case 3a: outer-loop stmt defining an inner-loop stmt:
534 outer-loop-header-bb:
540 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
542 if (dump_enabled_p ())
543 dump_printf_loc (MSG_NOTE
, vect_location
,
544 "outer-loop def-stmt defining inner-loop stmt.\n");
548 case vect_unused_in_scope
:
549 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
550 vect_used_in_scope
: vect_unused_in_scope
;
553 case vect_used_in_outer_by_reduction
:
554 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
555 relevant
= vect_used_by_reduction
;
558 case vect_used_in_outer
:
559 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
560 relevant
= vect_used_in_scope
;
563 case vect_used_in_scope
:
571 /* case 3b: inner-loop stmt defining an outer-loop stmt:
572 outer-loop-header-bb:
576 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
578 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
580 if (dump_enabled_p ())
581 dump_printf_loc (MSG_NOTE
, vect_location
,
582 "inner-loop def-stmt defining outer-loop stmt.\n");
586 case vect_unused_in_scope
:
587 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
588 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
589 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
592 case vect_used_by_reduction
:
593 relevant
= vect_used_in_outer_by_reduction
;
596 case vect_used_in_scope
:
597 relevant
= vect_used_in_outer
;
605 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
606 is_pattern_stmt_p (stmt_vinfo
));
611 /* Function vect_mark_stmts_to_be_vectorized.
613 Not all stmts in the loop need to be vectorized. For example:
622 Stmt 1 and 3 do not need to be vectorized, because loop control and
623 addressing of vectorized data-refs are handled differently.
625 This pass detects such stmts. */
628 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
630 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
631 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
632 unsigned int nbbs
= loop
->num_nodes
;
633 gimple_stmt_iterator si
;
636 stmt_vec_info stmt_vinfo
;
640 enum vect_relevant relevant
, tmp_relevant
;
641 enum vect_def_type def_type
;
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE
, vect_location
,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
647 auto_vec
<gimple
, 64> worklist
;
649 /* 1. Init worklist. */
650 for (i
= 0; i
< nbbs
; i
++)
653 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
660 dump_printf (MSG_NOTE
, "\n");
663 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
664 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
666 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
668 stmt
= gsi_stmt (si
);
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
672 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
673 dump_printf (MSG_NOTE
, "\n");
676 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
677 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
681 /* 2. Process_worklist */
682 while (worklist
.length () > 0)
687 stmt
= worklist
.pop ();
688 if (dump_enabled_p ())
690 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
691 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
692 dump_printf (MSG_NOTE
, "\n");
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant and live/dead according to the
697 liveness and relevance properties of STMT. */
698 stmt_vinfo
= vinfo_for_stmt (stmt
);
699 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
700 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
702 /* Generally, the liveness and relevance properties of STMT are
703 propagated as is to the DEF_STMTs of its USEs:
704 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
705 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
707 One exception is when STMT has been identified as defining a reduction
708 variable; in this case we set the liveness/relevance as follows:
710 relevant = vect_used_by_reduction
711 This is because we distinguish between two kinds of relevant stmts -
712 those that are used by a reduction computation, and those that are
713 (also) used by a regular computation. This allows us later on to
714 identify stmts that are used solely by a reduction, and therefore the
715 order of the results that they produce does not have to be kept. */
717 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
718 tmp_relevant
= relevant
;
721 case vect_reduction_def
:
722 switch (tmp_relevant
)
724 case vect_unused_in_scope
:
725 relevant
= vect_used_by_reduction
;
728 case vect_used_by_reduction
:
729 if (gimple_code (stmt
) == GIMPLE_PHI
)
734 if (dump_enabled_p ())
735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
736 "unsupported use of reduction.\n");
743 case vect_nested_cycle
:
744 if (tmp_relevant
!= vect_unused_in_scope
745 && tmp_relevant
!= vect_used_in_outer_by_reduction
746 && tmp_relevant
!= vect_used_in_outer
)
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
750 "unsupported use of nested cycle.\n");
758 case vect_double_reduction_def
:
759 if (tmp_relevant
!= vect_unused_in_scope
760 && tmp_relevant
!= vect_used_by_reduction
)
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
764 "unsupported use of double reduction.\n");
776 if (is_pattern_stmt_p (stmt_vinfo
))
778 /* Pattern statements are not inserted into the code, so
779 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
780 have to scan the RHS or function arguments instead. */
781 if (is_gimple_assign (stmt
))
783 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
784 tree op
= gimple_assign_rhs1 (stmt
);
787 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
789 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
790 live_p
, relevant
, &worklist
, false)
791 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
792 live_p
, relevant
, &worklist
, false))
796 for (; i
< gimple_num_ops (stmt
); i
++)
798 op
= gimple_op (stmt
, i
);
799 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
804 else if (is_gimple_call (stmt
))
806 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
808 tree arg
= gimple_call_arg (stmt
, i
);
809 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
816 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
818 tree op
= USE_FROM_PTR (use_p
);
819 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
824 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
827 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
829 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
833 } /* while worklist */
839 /* Function vect_model_simple_cost.
841 Models cost for simple operations, i.e. those that only emit ncopies of a
842 single op. Right now, this does not account for multiple insns that could
843 be generated for the single vector op. We will handle that shortly. */
846 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
847 enum vect_def_type
*dt
,
848 stmt_vector_for_cost
*prologue_cost_vec
,
849 stmt_vector_for_cost
*body_cost_vec
)
852 int inside_cost
= 0, prologue_cost
= 0;
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info
))
858 /* FORNOW: Assuming maximum 2 args per stmts. */
859 for (i
= 0; i
< 2; i
++)
860 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
861 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
862 stmt_info
, 0, vect_prologue
);
864 /* Pass the inside-of-loop statements to the target-specific cost model. */
865 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
866 stmt_info
, 0, vect_body
);
868 if (dump_enabled_p ())
869 dump_printf_loc (MSG_NOTE
, vect_location
,
870 "vect_model_simple_cost: inside_cost = %d, "
871 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
875 /* Model cost for type demotion and promotion operations. PWR is normally
876 zero for single-step promotions and demotions. It will be one if
877 two-step promotion/demotion is required, and so on. Each additional
878 step doubles the number of instructions required. */
881 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
882 enum vect_def_type
*dt
, int pwr
)
885 int inside_cost
= 0, prologue_cost
= 0;
886 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
887 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
888 void *target_cost_data
;
890 /* The SLP costs were already calculated during SLP tree build. */
891 if (PURE_SLP_STMT (stmt_info
))
895 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
897 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
899 for (i
= 0; i
< pwr
+ 1; i
++)
901 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
903 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
904 vec_promote_demote
, stmt_info
, 0,
908 /* FORNOW: Assuming maximum 2 args per stmts. */
909 for (i
= 0; i
< 2; i
++)
910 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
911 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
912 stmt_info
, 0, vect_prologue
);
914 if (dump_enabled_p ())
915 dump_printf_loc (MSG_NOTE
, vect_location
,
916 "vect_model_promotion_demotion_cost: inside_cost = %d, "
917 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
920 /* Function vect_cost_group_size
922 For grouped load or store, return the group_size only if it is the first
923 load or store of a group, else return 1. This ensures that group size is
924 only returned once per group. */
927 vect_cost_group_size (stmt_vec_info stmt_info
)
929 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
931 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
932 return GROUP_SIZE (stmt_info
);
938 /* Function vect_model_store_cost
940 Models cost for stores. In the case of grouped accesses, one access
941 has the overhead of the grouped access attributed to it. */
944 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
945 bool store_lanes_p
, enum vect_def_type dt
,
947 stmt_vector_for_cost
*prologue_cost_vec
,
948 stmt_vector_for_cost
*body_cost_vec
)
951 unsigned int inside_cost
= 0, prologue_cost
= 0;
952 struct data_reference
*first_dr
;
955 /* The SLP costs were already calculated during SLP tree build. */
956 if (PURE_SLP_STMT (stmt_info
))
959 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
960 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
961 stmt_info
, 0, vect_prologue
);
963 /* Grouped access? */
964 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
968 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
973 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
974 group_size
= vect_cost_group_size (stmt_info
);
977 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
979 /* Not a grouped access. */
983 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
986 /* We assume that the cost of a single store-lanes instruction is
987 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
988 access is instead being provided by a permute-and-store operation,
989 include the cost of the permutes. */
990 if (!store_lanes_p
&& group_size
> 1)
992 /* Uses a high and low interleave or shuffle operations for each
994 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
995 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
996 stmt_info
, 0, vect_body
);
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE
, vect_location
,
1000 "vect_model_store_cost: strided group_size = %d .\n",
1004 /* Costs of the stores. */
1005 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1007 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE
, vect_location
,
1009 "vect_model_store_cost: inside_cost = %d, "
1010 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1014 /* Calculate cost of DR's memory access. */
1016 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1017 unsigned int *inside_cost
,
1018 stmt_vector_for_cost
*body_cost_vec
)
1020 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1021 gimple stmt
= DR_STMT (dr
);
1022 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1024 switch (alignment_support_scheme
)
1028 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1029 vector_store
, stmt_info
, 0,
1032 if (dump_enabled_p ())
1033 dump_printf_loc (MSG_NOTE
, vect_location
,
1034 "vect_model_store_cost: aligned.\n");
1038 case dr_unaligned_supported
:
1040 /* Here, we assign an additional cost for the unaligned store. */
1041 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1042 unaligned_store
, stmt_info
,
1043 DR_MISALIGNMENT (dr
), vect_body
);
1044 if (dump_enabled_p ())
1045 dump_printf_loc (MSG_NOTE
, vect_location
,
1046 "vect_model_store_cost: unaligned supported by "
1051 case dr_unaligned_unsupported
:
1053 *inside_cost
= VECT_MAX_COST
;
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1057 "vect_model_store_cost: unsupported access.\n");
1067 /* Function vect_model_load_cost
1069 Models cost for loads. In the case of grouped accesses, the last access
1070 has the overhead of the grouped access attributed to it. Since unaligned
1071 accesses are supported for loads, we also account for the costs of the
1072 access scheme chosen. */
1075 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1076 bool load_lanes_p
, slp_tree slp_node
,
1077 stmt_vector_for_cost
*prologue_cost_vec
,
1078 stmt_vector_for_cost
*body_cost_vec
)
1082 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1083 unsigned int inside_cost
= 0, prologue_cost
= 0;
1085 /* The SLP costs were already calculated during SLP tree build. */
1086 if (PURE_SLP_STMT (stmt_info
))
1089 /* Grouped accesses? */
1090 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1091 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1093 group_size
= vect_cost_group_size (stmt_info
);
1094 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1096 /* Not a grouped access. */
1103 /* We assume that the cost of a single load-lanes instruction is
1104 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1105 access is instead being provided by a load-and-permute operation,
1106 include the cost of the permutes. */
1107 if (!load_lanes_p
&& group_size
> 1)
1109 /* Uses an even and odd extract operations or shuffle operations
1110 for each needed permute. */
1111 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1112 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1113 stmt_info
, 0, vect_body
);
1115 if (dump_enabled_p ())
1116 dump_printf_loc (MSG_NOTE
, vect_location
,
1117 "vect_model_load_cost: strided group_size = %d .\n",
1121 /* The loads themselves. */
1122 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1124 /* N scalar loads plus gathering them into a vector. */
1125 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1126 inside_cost
+= record_stmt_cost (body_cost_vec
,
1127 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1128 scalar_load
, stmt_info
, 0, vect_body
);
1129 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1130 stmt_info
, 0, vect_body
);
1133 vect_get_load_cost (first_dr
, ncopies
,
1134 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1135 || group_size
> 1 || slp_node
),
1136 &inside_cost
, &prologue_cost
,
1137 prologue_cost_vec
, body_cost_vec
, true);
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE
, vect_location
,
1141 "vect_model_load_cost: inside_cost = %d, "
1142 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1146 /* Calculate cost of DR's memory access. */
1148 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1149 bool add_realign_cost
, unsigned int *inside_cost
,
1150 unsigned int *prologue_cost
,
1151 stmt_vector_for_cost
*prologue_cost_vec
,
1152 stmt_vector_for_cost
*body_cost_vec
,
1153 bool record_prologue_costs
)
1155 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1156 gimple stmt
= DR_STMT (dr
);
1157 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1159 switch (alignment_support_scheme
)
1163 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1164 stmt_info
, 0, vect_body
);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE
, vect_location
,
1168 "vect_model_load_cost: aligned.\n");
1172 case dr_unaligned_supported
:
1174 /* Here, we assign an additional cost for the unaligned load. */
1175 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1176 unaligned_load
, stmt_info
,
1177 DR_MISALIGNMENT (dr
), vect_body
);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE
, vect_location
,
1181 "vect_model_load_cost: unaligned supported by "
1186 case dr_explicit_realign
:
1188 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1189 vector_load
, stmt_info
, 0, vect_body
);
1190 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1191 vec_perm
, stmt_info
, 0, vect_body
);
1193 /* FIXME: If the misalignment remains fixed across the iterations of
1194 the containing loop, the following cost should be added to the
1196 if (targetm
.vectorize
.builtin_mask_for_load
)
1197 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1198 stmt_info
, 0, vect_body
);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE
, vect_location
,
1202 "vect_model_load_cost: explicit realign\n");
1206 case dr_explicit_realign_optimized
:
1208 if (dump_enabled_p ())
1209 dump_printf_loc (MSG_NOTE
, vect_location
,
1210 "vect_model_load_cost: unaligned software "
1213 /* Unaligned software pipeline has a load of an address, an initial
1214 load, and possibly a mask operation to "prime" the loop. However,
1215 if this is an access in a group of loads, which provide grouped
1216 access, then the above cost should only be considered for one
1217 access in the group. Inside the loop, there is a load op
1218 and a realignment op. */
1220 if (add_realign_cost
&& record_prologue_costs
)
1222 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1223 vector_stmt
, stmt_info
,
1225 if (targetm
.vectorize
.builtin_mask_for_load
)
1226 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1227 vector_stmt
, stmt_info
,
1231 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1232 stmt_info
, 0, vect_body
);
1233 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1234 stmt_info
, 0, vect_body
);
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE
, vect_location
,
1238 "vect_model_load_cost: explicit realign optimized"
1244 case dr_unaligned_unsupported
:
1246 *inside_cost
= VECT_MAX_COST
;
1248 if (dump_enabled_p ())
1249 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1250 "vect_model_load_cost: unsupported access.\n");
1259 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1260 the loop preheader for the vectorized stmt STMT. */
1263 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1266 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1269 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1270 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1274 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1278 if (nested_in_vect_loop_p (loop
, stmt
))
1281 pe
= loop_preheader_edge (loop
);
1282 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1283 gcc_assert (!new_bb
);
1287 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1289 gimple_stmt_iterator gsi_bb_start
;
1291 gcc_assert (bb_vinfo
);
1292 bb
= BB_VINFO_BB (bb_vinfo
);
1293 gsi_bb_start
= gsi_after_labels (bb
);
1294 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1298 if (dump_enabled_p ())
1300 dump_printf_loc (MSG_NOTE
, vect_location
,
1301 "created new init_stmt: ");
1302 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1303 dump_printf (MSG_NOTE
, "\n");
1307 /* Function vect_init_vector.
1309 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1310 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1311 vector type a vector with all elements equal to VAL is created first.
1312 Place the initialization at BSI if it is not NULL. Otherwise, place the
1313 initialization at the loop preheader.
1314 Return the DEF of INIT_STMT.
1315 It will be used in the vectorization of STMT. */
1318 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1325 if (TREE_CODE (type
) == VECTOR_TYPE
1326 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1328 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1330 if (CONSTANT_CLASS_P (val
))
1331 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1334 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1335 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1338 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1342 val
= build_vector_from_val (type
, val
);
1345 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1346 init_stmt
= gimple_build_assign (new_var
, val
);
1347 new_temp
= make_ssa_name (new_var
, init_stmt
);
1348 gimple_assign_set_lhs (init_stmt
, new_temp
);
1349 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1350 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1355 /* Function vect_get_vec_def_for_operand.
1357 OP is an operand in STMT. This function returns a (vector) def that will be
1358 used in the vectorized stmt for STMT.
1360 In the case that OP is an SSA_NAME which is defined in the loop, then
1361 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1363 In case OP is an invariant or constant, a new stmt that creates a vector def
1364 needs to be introduced. */
1367 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1372 stmt_vec_info def_stmt_info
= NULL
;
1373 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1374 unsigned int nunits
;
1375 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1377 enum vect_def_type dt
;
1381 if (dump_enabled_p ())
1383 dump_printf_loc (MSG_NOTE
, vect_location
,
1384 "vect_get_vec_def_for_operand: ");
1385 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1386 dump_printf (MSG_NOTE
, "\n");
1389 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1390 &def_stmt
, &def
, &dt
);
1391 gcc_assert (is_simple_use
);
1392 if (dump_enabled_p ())
1394 int loc_printed
= 0;
1397 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1399 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1400 dump_printf (MSG_NOTE
, "\n");
1405 dump_printf (MSG_NOTE
, " def_stmt = ");
1407 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1408 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1409 dump_printf (MSG_NOTE
, "\n");
1415 /* Case 1: operand is a constant. */
1416 case vect_constant_def
:
1418 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1419 gcc_assert (vector_type
);
1420 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1425 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1426 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE
, vect_location
,
1428 "Create vector_cst. nunits = %d\n", nunits
);
1430 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1433 /* Case 2: operand is defined outside the loop - loop invariant. */
1434 case vect_external_def
:
1436 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1437 gcc_assert (vector_type
);
1442 /* Create 'vec_inv = {inv,inv,..,inv}' */
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1446 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1449 /* Case 3: operand is defined inside the loop. */
1450 case vect_internal_def
:
1453 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1455 /* Get the def from the vectorized stmt. */
1456 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1458 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1459 /* Get vectorized pattern statement. */
1461 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1462 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1463 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1464 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1465 gcc_assert (vec_stmt
);
1466 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1467 vec_oprnd
= PHI_RESULT (vec_stmt
);
1468 else if (is_gimple_call (vec_stmt
))
1469 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1471 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1475 /* Case 4: operand is defined by a loop header phi - reduction */
1476 case vect_reduction_def
:
1477 case vect_double_reduction_def
:
1478 case vect_nested_cycle
:
1482 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1483 loop
= (gimple_bb (def_stmt
))->loop_father
;
1485 /* Get the def before the loop */
1486 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1487 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1490 /* Case 5: operand is defined by loop-header phi - induction. */
1491 case vect_induction_def
:
1493 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1495 /* Get the def from the vectorized stmt. */
1496 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1497 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1498 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1499 vec_oprnd
= PHI_RESULT (vec_stmt
);
1501 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1511 /* Function vect_get_vec_def_for_stmt_copy
1513 Return a vector-def for an operand. This function is used when the
1514 vectorized stmt to be created (by the caller to this function) is a "copy"
1515 created in case the vectorized result cannot fit in one vector, and several
1516 copies of the vector-stmt are required. In this case the vector-def is
1517 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1518 of the stmt that defines VEC_OPRND.
1519 DT is the type of the vector def VEC_OPRND.
1522 In case the vectorization factor (VF) is bigger than the number
1523 of elements that can fit in a vectype (nunits), we have to generate
1524 more than one vector stmt to vectorize the scalar stmt. This situation
1525 arises when there are multiple data-types operated upon in the loop; the
1526 smallest data-type determines the VF, and as a result, when vectorizing
1527 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1528 vector stmt (each computing a vector of 'nunits' results, and together
1529 computing 'VF' results in each iteration). This function is called when
1530 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1531 which VF=16 and nunits=4, so the number of copies required is 4):
1533 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1535 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1536 VS1.1: vx.1 = memref1 VS1.2
1537 VS1.2: vx.2 = memref2 VS1.3
1538 VS1.3: vx.3 = memref3
1540 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1541 VSnew.1: vz1 = vx.1 + ... VSnew.2
1542 VSnew.2: vz2 = vx.2 + ... VSnew.3
1543 VSnew.3: vz3 = vx.3 + ...
1545 The vectorization of S1 is explained in vectorizable_load.
1546 The vectorization of S2:
1547 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1548 the function 'vect_get_vec_def_for_operand' is called to
1549 get the relevant vector-def for each operand of S2. For operand x it
1550 returns the vector-def 'vx.0'.
1552 To create the remaining copies of the vector-stmt (VSnew.j), this
1553 function is called to get the relevant vector-def for each operand. It is
1554 obtained from the respective VS1.j stmt, which is recorded in the
1555 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1557 For example, to obtain the vector-def 'vx.1' in order to create the
1558 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1559 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1560 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1561 and return its def ('vx.1').
1562 Overall, to create the above sequence this function will be called 3 times:
1563 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1564 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1565 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1568 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1570 gimple vec_stmt_for_operand
;
1571 stmt_vec_info def_stmt_info
;
1573 /* Do nothing; can reuse same def. */
1574 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1577 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1578 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1579 gcc_assert (def_stmt_info
);
1580 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1581 gcc_assert (vec_stmt_for_operand
);
1582 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1583 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1584 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1586 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1591 /* Get vectorized definitions for the operands to create a copy of an original
1592 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1595 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1596 vec
<tree
> *vec_oprnds0
,
1597 vec
<tree
> *vec_oprnds1
)
1599 tree vec_oprnd
= vec_oprnds0
->pop ();
1601 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1602 vec_oprnds0
->quick_push (vec_oprnd
);
1604 if (vec_oprnds1
&& vec_oprnds1
->length ())
1606 vec_oprnd
= vec_oprnds1
->pop ();
1607 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1608 vec_oprnds1
->quick_push (vec_oprnd
);
1613 /* Get vectorized definitions for OP0 and OP1.
1614 REDUC_INDEX is the index of reduction operand in case of reduction,
1615 and -1 otherwise. */
1618 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1619 vec
<tree
> *vec_oprnds0
,
1620 vec
<tree
> *vec_oprnds1
,
1621 slp_tree slp_node
, int reduc_index
)
1625 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1626 auto_vec
<tree
> ops (nops
);
1627 auto_vec
<vec
<tree
> > vec_defs (nops
);
1629 ops
.quick_push (op0
);
1631 ops
.quick_push (op1
);
1633 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1635 *vec_oprnds0
= vec_defs
[0];
1637 *vec_oprnds1
= vec_defs
[1];
1643 vec_oprnds0
->create (1);
1644 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1645 vec_oprnds0
->quick_push (vec_oprnd
);
1649 vec_oprnds1
->create (1);
1650 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1651 vec_oprnds1
->quick_push (vec_oprnd
);
1657 /* Function vect_finish_stmt_generation.
1659 Insert a new stmt. */
1662 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1663 gimple_stmt_iterator
*gsi
)
1665 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1666 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1667 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1669 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1671 if (!gsi_end_p (*gsi
)
1672 && gimple_has_mem_ops (vec_stmt
))
1674 gimple at_stmt
= gsi_stmt (*gsi
);
1675 tree vuse
= gimple_vuse (at_stmt
);
1676 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1678 tree vdef
= gimple_vdef (at_stmt
);
1679 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1680 /* If we have an SSA vuse and insert a store, update virtual
1681 SSA form to avoid triggering the renamer. Do so only
1682 if we can easily see all uses - which is what almost always
1683 happens with the way vectorized stmts are inserted. */
1684 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1685 && ((is_gimple_assign (vec_stmt
)
1686 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1687 || (is_gimple_call (vec_stmt
)
1688 && !(gimple_call_flags (vec_stmt
)
1689 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1691 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1692 gimple_set_vdef (vec_stmt
, new_vdef
);
1693 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1697 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1699 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1702 if (dump_enabled_p ())
1704 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1705 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1706 dump_printf (MSG_NOTE
, "\n");
1709 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1711 /* While EH edges will generally prevent vectorization, stmt might
1712 e.g. be in a must-not-throw region. Ensure newly created stmts
1713 that could throw are part of the same region. */
1714 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1715 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1716 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1719 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1720 a function declaration if the target has a vectorized version
1721 of the function, or NULL_TREE if the function cannot be vectorized. */
1724 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1726 tree fndecl
= gimple_call_fndecl (call
);
1728 /* We only handle functions that do not read or clobber memory -- i.e.
1729 const or novops ones. */
1730 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1734 || TREE_CODE (fndecl
) != FUNCTION_DECL
1735 || !DECL_BUILT_IN (fndecl
))
1738 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1743 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1744 gimple_stmt_iterator
*);
1747 /* Function vectorizable_mask_load_store.
1749 Check if STMT performs a conditional load or store that can be vectorized.
1750 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1751 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1752 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1755 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1756 gimple
*vec_stmt
, slp_tree slp_node
)
1758 tree vec_dest
= NULL
;
1759 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1760 stmt_vec_info prev_stmt_info
;
1761 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1762 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1763 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1764 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1765 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1769 tree dataref_ptr
= NULL_TREE
;
1771 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1775 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1776 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1777 int gather_scale
= 1;
1778 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1783 enum vect_def_type dt
;
1785 if (slp_node
!= NULL
)
1788 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1789 gcc_assert (ncopies
>= 1);
1791 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1792 mask
= gimple_call_arg (stmt
, 2);
1793 if (TYPE_PRECISION (TREE_TYPE (mask
))
1794 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1797 /* FORNOW. This restriction should be relaxed. */
1798 if (nested_in_vect_loop
&& ncopies
> 1)
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1802 "multiple types in nested loop.");
1806 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1809 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1812 if (!STMT_VINFO_DATA_REF (stmt_info
))
1815 elem_type
= TREE_TYPE (vectype
);
1817 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1820 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1823 if (STMT_VINFO_GATHER_P (stmt_info
))
1827 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1828 &gather_off
, &gather_scale
);
1829 gcc_assert (gather_decl
);
1830 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1831 &def_stmt
, &def
, &gather_dt
,
1832 &gather_off_vectype
))
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1836 "gather index use not simple.");
1840 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1842 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1843 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1847 "masked gather with integer mask not supported.");
1851 else if (tree_int_cst_compare (nested_in_vect_loop
1852 ? STMT_VINFO_DR_STEP (stmt_info
)
1853 : DR_STEP (dr
), size_zero_node
) <= 0)
1855 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1856 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1859 if (TREE_CODE (mask
) != SSA_NAME
)
1862 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1863 &def_stmt
, &def
, &dt
))
1868 tree rhs
= gimple_call_arg (stmt
, 3);
1869 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1870 &def_stmt
, &def
, &dt
))
1874 if (!vec_stmt
) /* transformation not required. */
1876 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1878 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1881 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1887 if (STMT_VINFO_GATHER_P (stmt_info
))
1889 tree vec_oprnd0
= NULL_TREE
, op
;
1890 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1891 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1892 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1893 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1894 tree mask_perm_mask
= NULL_TREE
;
1895 edge pe
= loop_preheader_edge (loop
);
1898 enum { NARROW
, NONE
, WIDEN
} modifier
;
1899 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1901 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1902 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1903 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1904 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1905 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1906 scaletype
= TREE_VALUE (arglist
);
1907 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1908 && types_compatible_p (srctype
, masktype
));
1910 if (nunits
== gather_off_nunits
)
1912 else if (nunits
== gather_off_nunits
/ 2)
1914 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1917 for (i
= 0; i
< gather_off_nunits
; ++i
)
1918 sel
[i
] = i
| nunits
;
1920 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
1921 gcc_assert (perm_mask
!= NULL_TREE
);
1923 else if (nunits
== gather_off_nunits
* 2)
1925 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1928 for (i
= 0; i
< nunits
; ++i
)
1929 sel
[i
] = i
< gather_off_nunits
1930 ? i
: i
+ nunits
- gather_off_nunits
;
1932 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
1933 gcc_assert (perm_mask
!= NULL_TREE
);
1935 for (i
= 0; i
< nunits
; ++i
)
1936 sel
[i
] = i
| gather_off_nunits
;
1937 mask_perm_mask
= vect_gen_perm_mask (masktype
, sel
);
1938 gcc_assert (mask_perm_mask
!= NULL_TREE
);
1943 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1945 ptr
= fold_convert (ptrtype
, gather_base
);
1946 if (!is_gimple_min_invariant (ptr
))
1948 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1949 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1950 gcc_assert (!new_bb
);
1953 scale
= build_int_cst (scaletype
, gather_scale
);
1955 prev_stmt_info
= NULL
;
1956 for (j
= 0; j
< ncopies
; ++j
)
1958 if (modifier
== WIDEN
&& (j
& 1))
1959 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1960 perm_mask
, stmt
, gsi
);
1963 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1966 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1968 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1970 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1971 == TYPE_VECTOR_SUBPARTS (idxtype
));
1972 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1973 var
= make_ssa_name (var
, NULL
);
1974 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1976 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
1978 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1982 if (mask_perm_mask
&& (j
& 1))
1983 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1984 mask_perm_mask
, stmt
, gsi
);
1988 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1991 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1992 &def_stmt
, &def
, &dt
);
1993 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1997 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1999 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2000 == TYPE_VECTOR_SUBPARTS (masktype
));
2001 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2003 var
= make_ssa_name (var
, NULL
);
2004 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2006 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
2007 mask_op
, NULL_TREE
);
2008 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2014 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2017 if (!useless_type_conversion_p (vectype
, rettype
))
2019 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2020 == TYPE_VECTOR_SUBPARTS (rettype
));
2021 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2022 op
= make_ssa_name (var
, new_stmt
);
2023 gimple_call_set_lhs (new_stmt
, op
);
2024 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2025 var
= make_ssa_name (vec_dest
, NULL
);
2026 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2028 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
2033 var
= make_ssa_name (vec_dest
, new_stmt
);
2034 gimple_call_set_lhs (new_stmt
, var
);
2037 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2039 if (modifier
== NARROW
)
2046 var
= permute_vec_elements (prev_res
, var
,
2047 perm_mask
, stmt
, gsi
);
2048 new_stmt
= SSA_NAME_DEF_STMT (var
);
2051 if (prev_stmt_info
== NULL
)
2052 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2054 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2055 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2058 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2060 tree lhs
= gimple_call_lhs (stmt
);
2061 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2062 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2063 set_vinfo_for_stmt (stmt
, NULL
);
2064 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2065 gsi_replace (gsi
, new_stmt
, true);
2070 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2071 prev_stmt_info
= NULL
;
2072 for (i
= 0; i
< ncopies
; i
++)
2074 unsigned align
, misalign
;
2078 tree rhs
= gimple_call_arg (stmt
, 3);
2079 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2080 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2081 /* We should have catched mismatched types earlier. */
2082 gcc_assert (useless_type_conversion_p (vectype
,
2083 TREE_TYPE (vec_rhs
)));
2084 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2085 NULL_TREE
, &dummy
, gsi
,
2086 &ptr_incr
, false, &inv_p
);
2087 gcc_assert (!inv_p
);
2091 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2093 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2094 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2096 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2097 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2098 TYPE_SIZE_UNIT (vectype
));
2101 align
= TYPE_ALIGN_UNIT (vectype
);
2102 if (aligned_access_p (dr
))
2104 else if (DR_MISALIGNMENT (dr
) == -1)
2106 align
= TYPE_ALIGN_UNIT (elem_type
);
2110 misalign
= DR_MISALIGNMENT (dr
);
2111 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2114 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2115 gimple_call_arg (stmt
, 1),
2117 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2119 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2121 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2122 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2127 tree vec_mask
= NULL_TREE
;
2128 prev_stmt_info
= NULL
;
2129 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2130 for (i
= 0; i
< ncopies
; i
++)
2132 unsigned align
, misalign
;
2136 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2137 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2138 NULL_TREE
, &dummy
, gsi
,
2139 &ptr_incr
, false, &inv_p
);
2140 gcc_assert (!inv_p
);
2144 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2146 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2147 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2148 TYPE_SIZE_UNIT (vectype
));
2151 align
= TYPE_ALIGN_UNIT (vectype
);
2152 if (aligned_access_p (dr
))
2154 else if (DR_MISALIGNMENT (dr
) == -1)
2156 align
= TYPE_ALIGN_UNIT (elem_type
);
2160 misalign
= DR_MISALIGNMENT (dr
);
2161 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2164 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2165 gimple_call_arg (stmt
, 1),
2167 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
, NULL
));
2168 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2170 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2172 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2173 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2179 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2181 tree lhs
= gimple_call_lhs (stmt
);
2182 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2183 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2184 set_vinfo_for_stmt (stmt
, NULL
);
2185 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2186 gsi_replace (gsi
, new_stmt
, true);
2193 /* Function vectorizable_call.
2195 Check if STMT performs a function call that can be vectorized.
2196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2201 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2207 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2208 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2209 tree vectype_out
, vectype_in
;
2212 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2213 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2214 tree fndecl
, new_temp
, def
, rhs_type
;
2216 enum vect_def_type dt
[3]
2217 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2218 gimple new_stmt
= NULL
;
2220 vec
<tree
> vargs
= vNULL
;
2221 enum { NARROW
, NONE
, WIDEN
} modifier
;
2225 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2228 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2231 /* Is STMT a vectorizable call? */
2232 if (!is_gimple_call (stmt
))
2235 if (gimple_call_internal_p (stmt
)
2236 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2237 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2238 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2241 if (gimple_call_lhs (stmt
) == NULL_TREE
2242 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2245 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2247 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2249 /* Process function arguments. */
2250 rhs_type
= NULL_TREE
;
2251 vectype_in
= NULL_TREE
;
2252 nargs
= gimple_call_num_args (stmt
);
2254 /* Bail out if the function has more than three arguments, we do not have
2255 interesting builtin functions to vectorize with more than two arguments
2256 except for fma. No arguments is also not good. */
2257 if (nargs
== 0 || nargs
> 3)
2260 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2261 if (gimple_call_internal_p (stmt
)
2262 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2265 rhs_type
= unsigned_type_node
;
2268 for (i
= 0; i
< nargs
; i
++)
2272 op
= gimple_call_arg (stmt
, i
);
2274 /* We can only handle calls with arguments of the same type. */
2276 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2280 "argument types differ.\n");
2284 rhs_type
= TREE_TYPE (op
);
2286 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2287 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2291 "use not simple.\n");
2296 vectype_in
= opvectype
;
2298 && opvectype
!= vectype_in
)
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2302 "argument vector types differ.\n");
2306 /* If all arguments are external or constant defs use a vector type with
2307 the same size as the output vector type. */
2309 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2311 gcc_assert (vectype_in
);
2314 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2317 "no vectype for scalar type ");
2318 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2319 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2326 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2327 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2328 if (nunits_in
== nunits_out
/ 2)
2330 else if (nunits_out
== nunits_in
)
2332 else if (nunits_out
== nunits_in
/ 2)
2337 /* For now, we only vectorize functions if a target specific builtin
2338 is available. TODO -- in some cases, it might be profitable to
2339 insert the calls for pieces of the vector, in order to be able
2340 to vectorize other operations in the loop. */
2341 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2342 if (fndecl
== NULL_TREE
)
2344 if (gimple_call_internal_p (stmt
)
2345 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2348 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2349 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2350 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2351 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2353 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2354 { 0, 1, 2, ... vf - 1 } vector. */
2355 gcc_assert (nargs
== 0);
2359 if (dump_enabled_p ())
2360 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2361 "function is not vectorizable.\n");
2366 gcc_assert (!gimple_vuse (stmt
));
2368 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2370 else if (modifier
== NARROW
)
2371 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2373 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2375 /* Sanity check: make sure that at least one copy of the vectorized stmt
2376 needs to be generated. */
2377 gcc_assert (ncopies
>= 1);
2379 if (!vec_stmt
) /* transformation not required. */
2381 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2385 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2391 if (dump_enabled_p ())
2392 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2395 scalar_dest
= gimple_call_lhs (stmt
);
2396 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2398 prev_stmt_info
= NULL
;
2402 for (j
= 0; j
< ncopies
; ++j
)
2404 /* Build argument list for the vectorized call. */
2406 vargs
.create (nargs
);
2412 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2413 vec
<tree
> vec_oprnds0
;
2415 for (i
= 0; i
< nargs
; i
++)
2416 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2417 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2418 vec_oprnds0
= vec_defs
[0];
2420 /* Arguments are ready. Create the new vector stmt. */
2421 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2424 for (k
= 0; k
< nargs
; k
++)
2426 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2427 vargs
[k
] = vec_oprndsk
[i
];
2429 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2430 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2431 gimple_call_set_lhs (new_stmt
, new_temp
);
2432 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2433 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2436 for (i
= 0; i
< nargs
; i
++)
2438 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2439 vec_oprndsi
.release ();
2444 for (i
= 0; i
< nargs
; i
++)
2446 op
= gimple_call_arg (stmt
, i
);
2449 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2452 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2454 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2457 vargs
.quick_push (vec_oprnd0
);
2460 if (gimple_call_internal_p (stmt
)
2461 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2463 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2465 for (k
= 0; k
< nunits_out
; ++k
)
2466 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2467 tree cst
= build_vector (vectype_out
, v
);
2469 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2470 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2471 new_temp
= make_ssa_name (new_var
, init_stmt
);
2472 gimple_assign_set_lhs (init_stmt
, new_temp
);
2473 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2474 new_temp
= make_ssa_name (vec_dest
, NULL
);
2475 new_stmt
= gimple_build_assign (new_temp
,
2476 gimple_assign_lhs (init_stmt
));
2480 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2481 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2482 gimple_call_set_lhs (new_stmt
, new_temp
);
2484 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2487 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2489 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2491 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2497 for (j
= 0; j
< ncopies
; ++j
)
2499 /* Build argument list for the vectorized call. */
2501 vargs
.create (nargs
* 2);
2507 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2508 vec
<tree
> vec_oprnds0
;
2510 for (i
= 0; i
< nargs
; i
++)
2511 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2512 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2513 vec_oprnds0
= vec_defs
[0];
2515 /* Arguments are ready. Create the new vector stmt. */
2516 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2520 for (k
= 0; k
< nargs
; k
++)
2522 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2523 vargs
.quick_push (vec_oprndsk
[i
]);
2524 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2526 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2527 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2528 gimple_call_set_lhs (new_stmt
, new_temp
);
2529 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2530 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2533 for (i
= 0; i
< nargs
; i
++)
2535 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2536 vec_oprndsi
.release ();
2541 for (i
= 0; i
< nargs
; i
++)
2543 op
= gimple_call_arg (stmt
, i
);
2547 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2549 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2553 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2555 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2557 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2560 vargs
.quick_push (vec_oprnd0
);
2561 vargs
.quick_push (vec_oprnd1
);
2564 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2565 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2566 gimple_call_set_lhs (new_stmt
, new_temp
);
2567 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2570 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2572 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2574 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2577 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2582 /* No current target implements this case. */
2588 /* The call in STMT might prevent it from being removed in dce.
2589 We however cannot remove it here, due to the way the ssa name
2590 it defines is mapped to the new definition. So just replace
2591 rhs of the statement with something harmless. */
2596 type
= TREE_TYPE (scalar_dest
);
2597 if (is_pattern_stmt_p (stmt_info
))
2598 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2600 lhs
= gimple_call_lhs (stmt
);
2601 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2602 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2603 set_vinfo_for_stmt (stmt
, NULL
);
2604 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2605 gsi_replace (gsi
, new_stmt
, false);
2611 struct simd_call_arg_info
2615 enum vect_def_type dt
;
2616 HOST_WIDE_INT linear_step
;
2620 /* Function vectorizable_simd_clone_call.
2622 Check if STMT performs a function call that can be vectorized
2623 by calling a simd clone of the function.
2624 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2625 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2626 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2629 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2630 gimple
*vec_stmt
, slp_tree slp_node
)
2635 tree vec_oprnd0
= NULL_TREE
;
2636 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2638 unsigned int nunits
;
2639 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2640 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2641 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2642 tree fndecl
, new_temp
, def
;
2644 gimple new_stmt
= NULL
;
2646 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2647 vec
<tree
> vargs
= vNULL
;
2649 tree lhs
, rtype
, ratype
;
2650 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2652 /* Is STMT a vectorizable call? */
2653 if (!is_gimple_call (stmt
))
2656 fndecl
= gimple_call_fndecl (stmt
);
2657 if (fndecl
== NULL_TREE
)
2660 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2661 if (node
== NULL
|| node
->simd_clones
== NULL
)
2664 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2667 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2670 if (gimple_call_lhs (stmt
)
2671 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2674 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2676 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2678 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2682 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2685 /* Process function arguments. */
2686 nargs
= gimple_call_num_args (stmt
);
2688 /* Bail out if the function has zero arguments. */
2692 arginfo
.create (nargs
);
2694 for (i
= 0; i
< nargs
; i
++)
2696 simd_call_arg_info thisarginfo
;
2699 thisarginfo
.linear_step
= 0;
2700 thisarginfo
.align
= 0;
2701 thisarginfo
.op
= NULL_TREE
;
2703 op
= gimple_call_arg (stmt
, i
);
2704 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2705 &def_stmt
, &def
, &thisarginfo
.dt
,
2706 &thisarginfo
.vectype
)
2707 || thisarginfo
.dt
== vect_uninitialized_def
)
2709 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2711 "use not simple.\n");
2716 if (thisarginfo
.dt
== vect_constant_def
2717 || thisarginfo
.dt
== vect_external_def
)
2718 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2720 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2722 if (thisarginfo
.dt
!= vect_constant_def
2723 && thisarginfo
.dt
!= vect_external_def
2725 && TREE_CODE (op
) == SSA_NAME
2726 && simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false)
2727 && tree_fits_shwi_p (iv
.step
))
2729 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2730 thisarginfo
.op
= iv
.base
;
2732 else if ((thisarginfo
.dt
== vect_constant_def
2733 || thisarginfo
.dt
== vect_external_def
)
2734 && POINTER_TYPE_P (TREE_TYPE (op
)))
2735 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2737 arginfo
.quick_push (thisarginfo
);
2740 unsigned int badness
= 0;
2741 struct cgraph_node
*bestn
= NULL
;
2742 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
))
2743 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
));
2745 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2746 n
= n
->simdclone
->next_clone
)
2748 unsigned int this_badness
= 0;
2749 if (n
->simdclone
->simdlen
2750 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2751 || n
->simdclone
->nargs
!= nargs
)
2753 if (n
->simdclone
->simdlen
2754 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2755 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2756 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2757 if (n
->simdclone
->inbranch
)
2758 this_badness
+= 2048;
2759 int target_badness
= targetm
.simd_clone
.usable (n
);
2760 if (target_badness
< 0)
2762 this_badness
+= target_badness
* 512;
2763 /* FORNOW: Have to add code to add the mask argument. */
2764 if (n
->simdclone
->inbranch
)
2766 for (i
= 0; i
< nargs
; i
++)
2768 switch (n
->simdclone
->args
[i
].arg_type
)
2770 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2771 if (!useless_type_conversion_p
2772 (n
->simdclone
->args
[i
].orig_type
,
2773 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2775 else if (arginfo
[i
].dt
== vect_constant_def
2776 || arginfo
[i
].dt
== vect_external_def
2777 || arginfo
[i
].linear_step
)
2780 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2781 if (arginfo
[i
].dt
!= vect_constant_def
2782 && arginfo
[i
].dt
!= vect_external_def
)
2785 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2786 if (arginfo
[i
].dt
== vect_constant_def
2787 || arginfo
[i
].dt
== vect_external_def
2788 || (arginfo
[i
].linear_step
2789 != n
->simdclone
->args
[i
].linear_step
))
2792 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2796 case SIMD_CLONE_ARG_TYPE_MASK
:
2799 if (i
== (size_t) -1)
2801 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2806 if (arginfo
[i
].align
)
2807 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2808 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2810 if (i
== (size_t) -1)
2812 if (bestn
== NULL
|| this_badness
< badness
)
2815 badness
= this_badness
;
2825 for (i
= 0; i
< nargs
; i
++)
2826 if ((arginfo
[i
].dt
== vect_constant_def
2827 || arginfo
[i
].dt
== vect_external_def
)
2828 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2831 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2833 if (arginfo
[i
].vectype
== NULL
2834 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2835 > bestn
->simdclone
->simdlen
))
2842 fndecl
= bestn
->decl
;
2843 nunits
= bestn
->simdclone
->simdlen
;
2844 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2846 /* If the function isn't const, only allow it in simd loops where user
2847 has asserted that at least nunits consecutive iterations can be
2848 performed using SIMD instructions. */
2849 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2850 && gimple_vuse (stmt
))
2856 /* Sanity check: make sure that at least one copy of the vectorized stmt
2857 needs to be generated. */
2858 gcc_assert (ncopies
>= 1);
2860 if (!vec_stmt
) /* transformation not required. */
2862 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
) = bestn
->decl
;
2863 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2864 if (dump_enabled_p ())
2865 dump_printf_loc (MSG_NOTE
, vect_location
,
2866 "=== vectorizable_simd_clone_call ===\n");
2867 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2874 if (dump_enabled_p ())
2875 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2878 scalar_dest
= gimple_call_lhs (stmt
);
2879 vec_dest
= NULL_TREE
;
2884 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2885 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2886 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2889 rtype
= TREE_TYPE (ratype
);
2893 prev_stmt_info
= NULL
;
2894 for (j
= 0; j
< ncopies
; ++j
)
2896 /* Build argument list for the vectorized call. */
2898 vargs
.create (nargs
);
2902 for (i
= 0; i
< nargs
; i
++)
2904 unsigned int k
, l
, m
, o
;
2906 op
= gimple_call_arg (stmt
, i
);
2907 switch (bestn
->simdclone
->args
[i
].arg_type
)
2909 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2910 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2911 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2912 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2914 if (TYPE_VECTOR_SUBPARTS (atype
)
2915 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2917 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2918 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2919 / TYPE_VECTOR_SUBPARTS (atype
));
2920 gcc_assert ((k
& (k
- 1)) == 0);
2923 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2926 vec_oprnd0
= arginfo
[i
].op
;
2927 if ((m
& (k
- 1)) == 0)
2929 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2932 arginfo
[i
].op
= vec_oprnd0
;
2934 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2936 bitsize_int ((m
& (k
- 1)) * prec
));
2938 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2940 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2941 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2945 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2946 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2947 gcc_assert ((k
& (k
- 1)) == 0);
2948 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2950 vec_alloc (ctor_elts
, k
);
2953 for (l
= 0; l
< k
; l
++)
2955 if (m
== 0 && l
== 0)
2957 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2960 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2962 arginfo
[i
].op
= vec_oprnd0
;
2965 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
2969 vargs
.safe_push (vec_oprnd0
);
2972 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
2974 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2976 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2977 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2982 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2983 vargs
.safe_push (op
);
2985 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2990 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
2995 edge pe
= loop_preheader_edge (loop
);
2996 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2997 gcc_assert (!new_bb
);
2999 tree phi_res
= copy_ssa_name (op
, NULL
);
3000 gimple new_phi
= create_phi_node (phi_res
, loop
->header
);
3001 set_vinfo_for_stmt (new_phi
,
3002 new_stmt_vec_info (new_phi
, loop_vinfo
,
3004 add_phi_arg (new_phi
, arginfo
[i
].op
,
3005 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3007 = POINTER_TYPE_P (TREE_TYPE (op
))
3008 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3009 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3010 ? sizetype
: TREE_TYPE (op
);
3012 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3014 tree tcst
= wide_int_to_tree (type
, cst
);
3015 tree phi_arg
= copy_ssa_name (op
, NULL
);
3016 new_stmt
= gimple_build_assign_with_ops (code
, phi_arg
,
3018 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3019 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3020 set_vinfo_for_stmt (new_stmt
,
3021 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3023 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3025 arginfo
[i
].op
= phi_res
;
3026 vargs
.safe_push (phi_res
);
3031 = POINTER_TYPE_P (TREE_TYPE (op
))
3032 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3033 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3034 ? sizetype
: TREE_TYPE (op
);
3036 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3038 tree tcst
= wide_int_to_tree (type
, cst
);
3039 new_temp
= make_ssa_name (TREE_TYPE (op
), NULL
);
3041 = gimple_build_assign_with_ops (code
, new_temp
,
3042 arginfo
[i
].op
, tcst
);
3043 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3044 vargs
.safe_push (new_temp
);
3047 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3053 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3056 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3058 new_temp
= create_tmp_var (ratype
, NULL
);
3059 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3060 == TYPE_VECTOR_SUBPARTS (rtype
))
3061 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3063 new_temp
= make_ssa_name (rtype
, new_stmt
);
3064 gimple_call_set_lhs (new_stmt
, new_temp
);
3066 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3070 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3073 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3074 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3075 gcc_assert ((k
& (k
- 1)) == 0);
3076 for (l
= 0; l
< k
; l
++)
3081 t
= build_fold_addr_expr (new_temp
);
3082 t
= build2 (MEM_REF
, vectype
, t
,
3083 build_int_cst (TREE_TYPE (t
),
3084 l
* prec
/ BITS_PER_UNIT
));
3087 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3088 size_int (prec
), bitsize_int (l
* prec
));
3090 = gimple_build_assign (make_ssa_name (vectype
, NULL
), t
);
3091 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3092 if (j
== 0 && l
== 0)
3093 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3095 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3097 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3102 tree clobber
= build_constructor (ratype
, NULL
);
3103 TREE_THIS_VOLATILE (clobber
) = 1;
3104 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3105 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3109 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3111 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3112 / TYPE_VECTOR_SUBPARTS (rtype
));
3113 gcc_assert ((k
& (k
- 1)) == 0);
3114 if ((j
& (k
- 1)) == 0)
3115 vec_alloc (ret_ctor_elts
, k
);
3118 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3119 for (m
= 0; m
< o
; m
++)
3121 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3122 size_int (m
), NULL_TREE
, NULL_TREE
);
3124 = gimple_build_assign (make_ssa_name (rtype
, NULL
),
3126 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3127 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3128 gimple_assign_lhs (new_stmt
));
3130 tree clobber
= build_constructor (ratype
, NULL
);
3131 TREE_THIS_VOLATILE (clobber
) = 1;
3132 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3133 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3136 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3137 if ((j
& (k
- 1)) != k
- 1)
3139 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3141 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
),
3143 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3145 if ((unsigned) j
== k
- 1)
3146 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3148 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3150 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3155 tree t
= build_fold_addr_expr (new_temp
);
3156 t
= build2 (MEM_REF
, vectype
, t
,
3157 build_int_cst (TREE_TYPE (t
), 0));
3159 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
), t
);
3160 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3161 tree clobber
= build_constructor (ratype
, NULL
);
3162 TREE_THIS_VOLATILE (clobber
) = 1;
3163 vect_finish_stmt_generation (stmt
,
3164 gimple_build_assign (new_temp
,
3170 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3172 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3174 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3179 /* The call in STMT might prevent it from being removed in dce.
3180 We however cannot remove it here, due to the way the ssa name
3181 it defines is mapped to the new definition. So just replace
3182 rhs of the statement with something harmless. */
3189 type
= TREE_TYPE (scalar_dest
);
3190 if (is_pattern_stmt_p (stmt_info
))
3191 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3193 lhs
= gimple_call_lhs (stmt
);
3194 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3197 new_stmt
= gimple_build_nop ();
3198 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3199 set_vinfo_for_stmt (stmt
, NULL
);
3200 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3201 gsi_replace (gsi
, new_stmt
, false);
3202 unlink_stmt_vdef (stmt
);
3208 /* Function vect_gen_widened_results_half
3210 Create a vector stmt whose code, type, number of arguments, and result
3211 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3212 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3213 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3214 needs to be created (DECL is a function-decl of a target-builtin).
3215 STMT is the original scalar stmt that we are vectorizing. */
3218 vect_gen_widened_results_half (enum tree_code code
,
3220 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3221 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3227 /* Generate half of the widened result: */
3228 if (code
== CALL_EXPR
)
3230 /* Target specific support */
3231 if (op_type
== binary_op
)
3232 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3234 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3235 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3236 gimple_call_set_lhs (new_stmt
, new_temp
);
3240 /* Generic support */
3241 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3242 if (op_type
!= binary_op
)
3244 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
3246 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3247 gimple_assign_set_lhs (new_stmt
, new_temp
);
3249 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3255 /* Get vectorized definitions for loop-based vectorization. For the first
3256 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3257 scalar operand), and for the rest we get a copy with
3258 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3259 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3260 The vectors are collected into VEC_OPRNDS. */
3263 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3264 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3268 /* Get first vector operand. */
3269 /* All the vector operands except the very first one (that is scalar oprnd)
3271 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3272 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3274 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3276 vec_oprnds
->quick_push (vec_oprnd
);
3278 /* Get second vector operand. */
3279 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3280 vec_oprnds
->quick_push (vec_oprnd
);
3284 /* For conversion in multiple steps, continue to get operands
3287 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3291 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3292 For multi-step conversions store the resulting vectors and call the function
3296 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3297 int multi_step_cvt
, gimple stmt
,
3299 gimple_stmt_iterator
*gsi
,
3300 slp_tree slp_node
, enum tree_code code
,
3301 stmt_vec_info
*prev_stmt_info
)
3304 tree vop0
, vop1
, new_tmp
, vec_dest
;
3306 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3308 vec_dest
= vec_dsts
.pop ();
3310 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3312 /* Create demotion operation. */
3313 vop0
= (*vec_oprnds
)[i
];
3314 vop1
= (*vec_oprnds
)[i
+ 1];
3315 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3316 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3317 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3318 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3321 /* Store the resulting vector for next recursive call. */
3322 (*vec_oprnds
)[i
/2] = new_tmp
;
3325 /* This is the last step of the conversion sequence. Store the
3326 vectors in SLP_NODE or in vector info of the scalar statement
3327 (or in STMT_VINFO_RELATED_STMT chain). */
3329 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3332 if (!*prev_stmt_info
)
3333 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3335 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3337 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3342 /* For multi-step demotion operations we first generate demotion operations
3343 from the source type to the intermediate types, and then combine the
3344 results (stored in VEC_OPRNDS) in demotion operation to the destination
3348 /* At each level of recursion we have half of the operands we had at the
3350 vec_oprnds
->truncate ((i
+1)/2);
3351 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3352 stmt
, vec_dsts
, gsi
, slp_node
,
3353 VEC_PACK_TRUNC_EXPR
,
3357 vec_dsts
.quick_push (vec_dest
);
3361 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3362 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3363 the resulting vectors and call the function recursively. */
3366 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3367 vec
<tree
> *vec_oprnds1
,
3368 gimple stmt
, tree vec_dest
,
3369 gimple_stmt_iterator
*gsi
,
3370 enum tree_code code1
,
3371 enum tree_code code2
, tree decl1
,
3372 tree decl2
, int op_type
)
3375 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3376 gimple new_stmt1
, new_stmt2
;
3377 vec
<tree
> vec_tmp
= vNULL
;
3379 vec_tmp
.create (vec_oprnds0
->length () * 2);
3380 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3382 if (op_type
== binary_op
)
3383 vop1
= (*vec_oprnds1
)[i
];
3387 /* Generate the two halves of promotion operation. */
3388 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3389 op_type
, vec_dest
, gsi
, stmt
);
3390 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3391 op_type
, vec_dest
, gsi
, stmt
);
3392 if (is_gimple_call (new_stmt1
))
3394 new_tmp1
= gimple_call_lhs (new_stmt1
);
3395 new_tmp2
= gimple_call_lhs (new_stmt2
);
3399 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3400 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3403 /* Store the results for the next step. */
3404 vec_tmp
.quick_push (new_tmp1
);
3405 vec_tmp
.quick_push (new_tmp2
);
3408 vec_oprnds0
->release ();
3409 *vec_oprnds0
= vec_tmp
;
3413 /* Check if STMT performs a conversion operation, that can be vectorized.
3414 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3415 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3416 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3419 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3420 gimple
*vec_stmt
, slp_tree slp_node
)
3424 tree op0
, op1
= NULL_TREE
;
3425 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3426 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3427 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3428 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3429 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3430 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3434 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3435 gimple new_stmt
= NULL
;
3436 stmt_vec_info prev_stmt_info
;
3439 tree vectype_out
, vectype_in
;
3441 tree lhs_type
, rhs_type
;
3442 enum { NARROW
, NONE
, WIDEN
} modifier
;
3443 vec
<tree
> vec_oprnds0
= vNULL
;
3444 vec
<tree
> vec_oprnds1
= vNULL
;
3446 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3447 int multi_step_cvt
= 0;
3448 vec
<tree
> vec_dsts
= vNULL
;
3449 vec
<tree
> interm_types
= vNULL
;
3450 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3452 machine_mode rhs_mode
;
3453 unsigned short fltsz
;
3455 /* Is STMT a vectorizable conversion? */
3457 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3460 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3463 if (!is_gimple_assign (stmt
))
3466 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3469 code
= gimple_assign_rhs_code (stmt
);
3470 if (!CONVERT_EXPR_CODE_P (code
)
3471 && code
!= FIX_TRUNC_EXPR
3472 && code
!= FLOAT_EXPR
3473 && code
!= WIDEN_MULT_EXPR
3474 && code
!= WIDEN_LSHIFT_EXPR
)
3477 op_type
= TREE_CODE_LENGTH (code
);
3479 /* Check types of lhs and rhs. */
3480 scalar_dest
= gimple_assign_lhs (stmt
);
3481 lhs_type
= TREE_TYPE (scalar_dest
);
3482 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3484 op0
= gimple_assign_rhs1 (stmt
);
3485 rhs_type
= TREE_TYPE (op0
);
3487 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3488 && !((INTEGRAL_TYPE_P (lhs_type
)
3489 && INTEGRAL_TYPE_P (rhs_type
))
3490 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3491 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3494 if ((INTEGRAL_TYPE_P (lhs_type
)
3495 && (TYPE_PRECISION (lhs_type
)
3496 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3497 || (INTEGRAL_TYPE_P (rhs_type
)
3498 && (TYPE_PRECISION (rhs_type
)
3499 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3501 if (dump_enabled_p ())
3502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3503 "type conversion to/from bit-precision unsupported."
3508 /* Check the operands of the operation. */
3509 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3510 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3512 if (dump_enabled_p ())
3513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3514 "use not simple.\n");
3517 if (op_type
== binary_op
)
3521 op1
= gimple_assign_rhs2 (stmt
);
3522 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3523 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3525 if (CONSTANT_CLASS_P (op0
))
3526 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3527 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3529 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3534 if (dump_enabled_p ())
3535 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3536 "use not simple.\n");
3541 /* If op0 is an external or constant defs use a vector type of
3542 the same size as the output vector type. */
3544 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3546 gcc_assert (vectype_in
);
3549 if (dump_enabled_p ())
3551 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3552 "no vectype for scalar type ");
3553 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3554 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3560 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3561 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3562 if (nunits_in
< nunits_out
)
3564 else if (nunits_out
== nunits_in
)
3569 /* Multiple types in SLP are handled by creating the appropriate number of
3570 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3572 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3574 else if (modifier
== NARROW
)
3575 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3577 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3579 /* Sanity check: make sure that at least one copy of the vectorized stmt
3580 needs to be generated. */
3581 gcc_assert (ncopies
>= 1);
3583 /* Supportable by target? */
3587 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3589 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3594 if (dump_enabled_p ())
3595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3596 "conversion not supported by target.\n");
3600 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3601 &code1
, &code2
, &multi_step_cvt
,
3604 /* Binary widening operation can only be supported directly by the
3606 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3610 if (code
!= FLOAT_EXPR
3611 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3612 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3615 rhs_mode
= TYPE_MODE (rhs_type
);
3616 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3617 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3618 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3619 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3622 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3623 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3624 if (cvt_type
== NULL_TREE
)
3627 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3629 if (!supportable_convert_operation (code
, vectype_out
,
3630 cvt_type
, &decl1
, &codecvt1
))
3633 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3634 cvt_type
, &codecvt1
,
3635 &codecvt2
, &multi_step_cvt
,
3639 gcc_assert (multi_step_cvt
== 0);
3641 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3642 vectype_in
, &code1
, &code2
,
3643 &multi_step_cvt
, &interm_types
))
3647 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3650 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3651 codecvt2
= ERROR_MARK
;
3655 interm_types
.safe_push (cvt_type
);
3656 cvt_type
= NULL_TREE
;
3661 gcc_assert (op_type
== unary_op
);
3662 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3663 &code1
, &multi_step_cvt
,
3667 if (code
!= FIX_TRUNC_EXPR
3668 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3669 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3672 rhs_mode
= TYPE_MODE (rhs_type
);
3674 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3675 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3676 if (cvt_type
== NULL_TREE
)
3678 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3681 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3682 &code1
, &multi_step_cvt
,
3691 if (!vec_stmt
) /* transformation not required. */
3693 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_NOTE
, vect_location
,
3695 "=== vectorizable_conversion ===\n");
3696 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3698 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3699 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3701 else if (modifier
== NARROW
)
3703 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3704 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3708 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3709 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3711 interm_types
.release ();
3716 if (dump_enabled_p ())
3717 dump_printf_loc (MSG_NOTE
, vect_location
,
3718 "transform conversion. ncopies = %d.\n", ncopies
);
3720 if (op_type
== binary_op
)
3722 if (CONSTANT_CLASS_P (op0
))
3723 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3724 else if (CONSTANT_CLASS_P (op1
))
3725 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3728 /* In case of multi-step conversion, we first generate conversion operations
3729 to the intermediate types, and then from that types to the final one.
3730 We create vector destinations for the intermediate type (TYPES) received
3731 from supportable_*_operation, and store them in the correct order
3732 for future use in vect_create_vectorized_*_stmts (). */
3733 vec_dsts
.create (multi_step_cvt
+ 1);
3734 vec_dest
= vect_create_destination_var (scalar_dest
,
3735 (cvt_type
&& modifier
== WIDEN
)
3736 ? cvt_type
: vectype_out
);
3737 vec_dsts
.quick_push (vec_dest
);
3741 for (i
= interm_types
.length () - 1;
3742 interm_types
.iterate (i
, &intermediate_type
); i
--)
3744 vec_dest
= vect_create_destination_var (scalar_dest
,
3746 vec_dsts
.quick_push (vec_dest
);
3751 vec_dest
= vect_create_destination_var (scalar_dest
,
3753 ? vectype_out
: cvt_type
);
3757 if (modifier
== WIDEN
)
3759 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3760 if (op_type
== binary_op
)
3761 vec_oprnds1
.create (1);
3763 else if (modifier
== NARROW
)
3764 vec_oprnds0
.create (
3765 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3767 else if (code
== WIDEN_LSHIFT_EXPR
)
3768 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3771 prev_stmt_info
= NULL
;
3775 for (j
= 0; j
< ncopies
; j
++)
3778 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3781 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3783 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3785 /* Arguments are ready, create the new vector stmt. */
3786 if (code1
== CALL_EXPR
)
3788 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3789 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3790 gimple_call_set_lhs (new_stmt
, new_temp
);
3794 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3795 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
3797 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3798 gimple_assign_set_lhs (new_stmt
, new_temp
);
3801 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3803 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3807 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3809 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3810 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3815 /* In case the vectorization factor (VF) is bigger than the number
3816 of elements that we can fit in a vectype (nunits), we have to
3817 generate more than one vector stmt - i.e - we need to "unroll"
3818 the vector stmt by a factor VF/nunits. */
3819 for (j
= 0; j
< ncopies
; j
++)
3826 if (code
== WIDEN_LSHIFT_EXPR
)
3831 /* Store vec_oprnd1 for every vector stmt to be created
3832 for SLP_NODE. We check during the analysis that all
3833 the shift arguments are the same. */
3834 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3835 vec_oprnds1
.quick_push (vec_oprnd1
);
3837 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3841 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3842 &vec_oprnds1
, slp_node
, -1);
3846 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3847 vec_oprnds0
.quick_push (vec_oprnd0
);
3848 if (op_type
== binary_op
)
3850 if (code
== WIDEN_LSHIFT_EXPR
)
3853 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3855 vec_oprnds1
.quick_push (vec_oprnd1
);
3861 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3862 vec_oprnds0
.truncate (0);
3863 vec_oprnds0
.quick_push (vec_oprnd0
);
3864 if (op_type
== binary_op
)
3866 if (code
== WIDEN_LSHIFT_EXPR
)
3869 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3871 vec_oprnds1
.truncate (0);
3872 vec_oprnds1
.quick_push (vec_oprnd1
);
3876 /* Arguments are ready. Create the new vector stmts. */
3877 for (i
= multi_step_cvt
; i
>= 0; i
--)
3879 tree this_dest
= vec_dsts
[i
];
3880 enum tree_code c1
= code1
, c2
= code2
;
3881 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3886 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3888 stmt
, this_dest
, gsi
,
3889 c1
, c2
, decl1
, decl2
,
3893 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3897 if (codecvt1
== CALL_EXPR
)
3899 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3900 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3901 gimple_call_set_lhs (new_stmt
, new_temp
);
3905 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3906 new_temp
= make_ssa_name (vec_dest
, NULL
);
3907 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
3912 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3915 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3918 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3921 if (!prev_stmt_info
)
3922 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3924 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3925 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3930 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3934 /* In case the vectorization factor (VF) is bigger than the number
3935 of elements that we can fit in a vectype (nunits), we have to
3936 generate more than one vector stmt - i.e - we need to "unroll"
3937 the vector stmt by a factor VF/nunits. */
3938 for (j
= 0; j
< ncopies
; j
++)
3942 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3946 vec_oprnds0
.truncate (0);
3947 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3948 vect_pow2 (multi_step_cvt
) - 1);
3951 /* Arguments are ready. Create the new vector stmts. */
3953 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3955 if (codecvt1
== CALL_EXPR
)
3957 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3958 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3959 gimple_call_set_lhs (new_stmt
, new_temp
);
3963 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3964 new_temp
= make_ssa_name (vec_dest
, NULL
);
3965 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
3969 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3970 vec_oprnds0
[i
] = new_temp
;
3973 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
3974 stmt
, vec_dsts
, gsi
,
3979 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3983 vec_oprnds0
.release ();
3984 vec_oprnds1
.release ();
3985 vec_dsts
.release ();
3986 interm_types
.release ();
3992 /* Function vectorizable_assignment.
3994 Check if STMT performs an assignment (copy) that can be vectorized.
3995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3996 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4000 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4001 gimple
*vec_stmt
, slp_tree slp_node
)
4006 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4007 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4008 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4012 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4013 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4016 vec
<tree
> vec_oprnds
= vNULL
;
4018 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4019 gimple new_stmt
= NULL
;
4020 stmt_vec_info prev_stmt_info
= NULL
;
4021 enum tree_code code
;
4024 /* Multiple types in SLP are handled by creating the appropriate number of
4025 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4027 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4030 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4032 gcc_assert (ncopies
>= 1);
4034 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4037 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4040 /* Is vectorizable assignment? */
4041 if (!is_gimple_assign (stmt
))
4044 scalar_dest
= gimple_assign_lhs (stmt
);
4045 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4048 code
= gimple_assign_rhs_code (stmt
);
4049 if (gimple_assign_single_p (stmt
)
4050 || code
== PAREN_EXPR
4051 || CONVERT_EXPR_CODE_P (code
))
4052 op
= gimple_assign_rhs1 (stmt
);
4056 if (code
== VIEW_CONVERT_EXPR
)
4057 op
= TREE_OPERAND (op
, 0);
4059 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4060 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4062 if (dump_enabled_p ())
4063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4064 "use not simple.\n");
4068 /* We can handle NOP_EXPR conversions that do not change the number
4069 of elements or the vector size. */
4070 if ((CONVERT_EXPR_CODE_P (code
)
4071 || code
== VIEW_CONVERT_EXPR
)
4073 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4074 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4075 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4078 /* We do not handle bit-precision changes. */
4079 if ((CONVERT_EXPR_CODE_P (code
)
4080 || code
== VIEW_CONVERT_EXPR
)
4081 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4082 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4083 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4084 || ((TYPE_PRECISION (TREE_TYPE (op
))
4085 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4086 /* But a conversion that does not change the bit-pattern is ok. */
4087 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4088 > TYPE_PRECISION (TREE_TYPE (op
)))
4089 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4091 if (dump_enabled_p ())
4092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4093 "type conversion to/from bit-precision "
4098 if (!vec_stmt
) /* transformation not required. */
4100 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4101 if (dump_enabled_p ())
4102 dump_printf_loc (MSG_NOTE
, vect_location
,
4103 "=== vectorizable_assignment ===\n");
4104 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4109 if (dump_enabled_p ())
4110 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4113 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4116 for (j
= 0; j
< ncopies
; j
++)
4120 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4122 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4124 /* Arguments are ready. create the new vector stmt. */
4125 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4127 if (CONVERT_EXPR_CODE_P (code
)
4128 || code
== VIEW_CONVERT_EXPR
)
4129 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4130 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4131 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4132 gimple_assign_set_lhs (new_stmt
, new_temp
);
4133 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4135 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4142 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4144 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4146 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4149 vec_oprnds
.release ();
4154 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4155 either as shift by a scalar or by a vector. */
4158 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4161 machine_mode vec_mode
;
4166 vectype
= get_vectype_for_scalar_type (scalar_type
);
4170 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4172 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4174 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4176 || (optab_handler (optab
, TYPE_MODE (vectype
))
4177 == CODE_FOR_nothing
))
4181 vec_mode
= TYPE_MODE (vectype
);
4182 icode
= (int) optab_handler (optab
, vec_mode
);
4183 if (icode
== CODE_FOR_nothing
)
4190 /* Function vectorizable_shift.
4192 Check if STMT performs a shift operation that can be vectorized.
4193 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4194 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4195 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4198 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4199 gimple
*vec_stmt
, slp_tree slp_node
)
4203 tree op0
, op1
= NULL
;
4204 tree vec_oprnd1
= NULL_TREE
;
4205 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4207 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4208 enum tree_code code
;
4209 machine_mode vec_mode
;
4213 machine_mode optab_op2_mode
;
4216 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4217 gimple new_stmt
= NULL
;
4218 stmt_vec_info prev_stmt_info
;
4225 vec
<tree
> vec_oprnds0
= vNULL
;
4226 vec
<tree
> vec_oprnds1
= vNULL
;
4229 bool scalar_shift_arg
= true;
4230 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4233 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4236 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4239 /* Is STMT a vectorizable binary/unary operation? */
4240 if (!is_gimple_assign (stmt
))
4243 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4246 code
= gimple_assign_rhs_code (stmt
);
4248 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4249 || code
== RROTATE_EXPR
))
4252 scalar_dest
= gimple_assign_lhs (stmt
);
4253 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4254 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4255 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4257 if (dump_enabled_p ())
4258 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4259 "bit-precision shifts not supported.\n");
4263 op0
= gimple_assign_rhs1 (stmt
);
4264 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4265 &def_stmt
, &def
, &dt
[0], &vectype
))
4267 if (dump_enabled_p ())
4268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4269 "use not simple.\n");
4272 /* If op0 is an external or constant def use a vector type with
4273 the same size as the output vector type. */
4275 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4277 gcc_assert (vectype
);
4280 if (dump_enabled_p ())
4281 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4282 "no vectype for scalar type\n");
4286 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4287 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4288 if (nunits_out
!= nunits_in
)
4291 op1
= gimple_assign_rhs2 (stmt
);
4292 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4293 &def
, &dt
[1], &op1_vectype
))
4295 if (dump_enabled_p ())
4296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4297 "use not simple.\n");
4302 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4306 /* Multiple types in SLP are handled by creating the appropriate number of
4307 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4309 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4312 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4314 gcc_assert (ncopies
>= 1);
4316 /* Determine whether the shift amount is a vector, or scalar. If the
4317 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4319 if (dt
[1] == vect_internal_def
&& !slp_node
)
4320 scalar_shift_arg
= false;
4321 else if (dt
[1] == vect_constant_def
4322 || dt
[1] == vect_external_def
4323 || dt
[1] == vect_internal_def
)
4325 /* In SLP, need to check whether the shift count is the same,
4326 in loops if it is a constant or invariant, it is always
4330 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4333 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4334 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4335 scalar_shift_arg
= false;
4340 if (dump_enabled_p ())
4341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4342 "operand mode requires invariant argument.\n");
4346 /* Vector shifted by vector. */
4347 if (!scalar_shift_arg
)
4349 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4350 if (dump_enabled_p ())
4351 dump_printf_loc (MSG_NOTE
, vect_location
,
4352 "vector/vector shift/rotate found.\n");
4355 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4356 if (op1_vectype
== NULL_TREE
4357 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4359 if (dump_enabled_p ())
4360 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4361 "unusable type for last operand in"
4362 " vector/vector shift/rotate.\n");
4366 /* See if the machine has a vector shifted by scalar insn and if not
4367 then see if it has a vector shifted by vector insn. */
4370 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4372 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4374 if (dump_enabled_p ())
4375 dump_printf_loc (MSG_NOTE
, vect_location
,
4376 "vector/scalar shift/rotate found.\n");
4380 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4382 && (optab_handler (optab
, TYPE_MODE (vectype
))
4383 != CODE_FOR_nothing
))
4385 scalar_shift_arg
= false;
4387 if (dump_enabled_p ())
4388 dump_printf_loc (MSG_NOTE
, vect_location
,
4389 "vector/vector shift/rotate found.\n");
4391 /* Unlike the other binary operators, shifts/rotates have
4392 the rhs being int, instead of the same type as the lhs,
4393 so make sure the scalar is the right type if we are
4394 dealing with vectors of long long/long/short/char. */
4395 if (dt
[1] == vect_constant_def
)
4396 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4397 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4401 && TYPE_MODE (TREE_TYPE (vectype
))
4402 != TYPE_MODE (TREE_TYPE (op1
)))
4404 if (dump_enabled_p ())
4405 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4406 "unusable type for last operand in"
4407 " vector/vector shift/rotate.\n");
4410 if (vec_stmt
&& !slp_node
)
4412 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4413 op1
= vect_init_vector (stmt
, op1
,
4414 TREE_TYPE (vectype
), NULL
);
4421 /* Supportable by target? */
4424 if (dump_enabled_p ())
4425 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4429 vec_mode
= TYPE_MODE (vectype
);
4430 icode
= (int) optab_handler (optab
, vec_mode
);
4431 if (icode
== CODE_FOR_nothing
)
4433 if (dump_enabled_p ())
4434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4435 "op not supported by target.\n");
4436 /* Check only during analysis. */
4437 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4438 || (vf
< vect_min_worthwhile_factor (code
)
4441 if (dump_enabled_p ())
4442 dump_printf_loc (MSG_NOTE
, vect_location
,
4443 "proceeding using word mode.\n");
4446 /* Worthwhile without SIMD support? Check only during analysis. */
4447 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4448 && vf
< vect_min_worthwhile_factor (code
)
4451 if (dump_enabled_p ())
4452 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4453 "not worthwhile without SIMD support.\n");
4457 if (!vec_stmt
) /* transformation not required. */
4459 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4460 if (dump_enabled_p ())
4461 dump_printf_loc (MSG_NOTE
, vect_location
,
4462 "=== vectorizable_shift ===\n");
4463 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4469 if (dump_enabled_p ())
4470 dump_printf_loc (MSG_NOTE
, vect_location
,
4471 "transform binary/unary operation.\n");
4474 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4476 prev_stmt_info
= NULL
;
4477 for (j
= 0; j
< ncopies
; j
++)
4482 if (scalar_shift_arg
)
4484 /* Vector shl and shr insn patterns can be defined with scalar
4485 operand 2 (shift operand). In this case, use constant or loop
4486 invariant op1 directly, without extending it to vector mode
4488 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4489 if (!VECTOR_MODE_P (optab_op2_mode
))
4491 if (dump_enabled_p ())
4492 dump_printf_loc (MSG_NOTE
, vect_location
,
4493 "operand 1 using scalar mode.\n");
4495 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4496 vec_oprnds1
.quick_push (vec_oprnd1
);
4499 /* Store vec_oprnd1 for every vector stmt to be created
4500 for SLP_NODE. We check during the analysis that all
4501 the shift arguments are the same.
4502 TODO: Allow different constants for different vector
4503 stmts generated for an SLP instance. */
4504 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4505 vec_oprnds1
.quick_push (vec_oprnd1
);
4510 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4511 (a special case for certain kind of vector shifts); otherwise,
4512 operand 1 should be of a vector type (the usual case). */
4514 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4517 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4521 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4523 /* Arguments are ready. Create the new vector stmt. */
4524 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4526 vop1
= vec_oprnds1
[i
];
4527 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4528 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4529 gimple_assign_set_lhs (new_stmt
, new_temp
);
4530 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4532 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4539 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4541 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4542 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4545 vec_oprnds0
.release ();
4546 vec_oprnds1
.release ();
4552 /* Function vectorizable_operation.
4554 Check if STMT performs a binary, unary or ternary operation that can
4556 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4557 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4558 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4561 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4562 gimple
*vec_stmt
, slp_tree slp_node
)
4566 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4567 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4569 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4570 enum tree_code code
;
4571 machine_mode vec_mode
;
4578 enum vect_def_type dt
[3]
4579 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4580 gimple new_stmt
= NULL
;
4581 stmt_vec_info prev_stmt_info
;
4587 vec
<tree
> vec_oprnds0
= vNULL
;
4588 vec
<tree
> vec_oprnds1
= vNULL
;
4589 vec
<tree
> vec_oprnds2
= vNULL
;
4590 tree vop0
, vop1
, vop2
;
4591 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4594 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4597 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4600 /* Is STMT a vectorizable binary/unary operation? */
4601 if (!is_gimple_assign (stmt
))
4604 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4607 code
= gimple_assign_rhs_code (stmt
);
4609 /* For pointer addition, we should use the normal plus for
4610 the vector addition. */
4611 if (code
== POINTER_PLUS_EXPR
)
4614 /* Support only unary or binary operations. */
4615 op_type
= TREE_CODE_LENGTH (code
);
4616 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4618 if (dump_enabled_p ())
4619 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4620 "num. args = %d (not unary/binary/ternary op).\n",
4625 scalar_dest
= gimple_assign_lhs (stmt
);
4626 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4628 /* Most operations cannot handle bit-precision types without extra
4630 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4631 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4632 /* Exception are bitwise binary operations. */
4633 && code
!= BIT_IOR_EXPR
4634 && code
!= BIT_XOR_EXPR
4635 && code
!= BIT_AND_EXPR
)
4637 if (dump_enabled_p ())
4638 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4639 "bit-precision arithmetic not supported.\n");
4643 op0
= gimple_assign_rhs1 (stmt
);
4644 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4645 &def_stmt
, &def
, &dt
[0], &vectype
))
4647 if (dump_enabled_p ())
4648 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4649 "use not simple.\n");
4652 /* If op0 is an external or constant def use a vector type with
4653 the same size as the output vector type. */
4655 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4657 gcc_assert (vectype
);
4660 if (dump_enabled_p ())
4662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4663 "no vectype for scalar type ");
4664 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4666 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4672 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4673 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4674 if (nunits_out
!= nunits_in
)
4677 if (op_type
== binary_op
|| op_type
== ternary_op
)
4679 op1
= gimple_assign_rhs2 (stmt
);
4680 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4683 if (dump_enabled_p ())
4684 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4685 "use not simple.\n");
4689 if (op_type
== ternary_op
)
4691 op2
= gimple_assign_rhs3 (stmt
);
4692 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4697 "use not simple.\n");
4703 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4707 /* Multiple types in SLP are handled by creating the appropriate number of
4708 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4710 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4713 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4715 gcc_assert (ncopies
>= 1);
4717 /* Shifts are handled in vectorizable_shift (). */
4718 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4719 || code
== RROTATE_EXPR
)
4722 /* Supportable by target? */
4724 vec_mode
= TYPE_MODE (vectype
);
4725 if (code
== MULT_HIGHPART_EXPR
)
4727 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4728 icode
= LAST_INSN_CODE
;
4730 icode
= CODE_FOR_nothing
;
4734 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4737 if (dump_enabled_p ())
4738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4742 icode
= (int) optab_handler (optab
, vec_mode
);
4745 if (icode
== CODE_FOR_nothing
)
4747 if (dump_enabled_p ())
4748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4749 "op not supported by target.\n");
4750 /* Check only during analysis. */
4751 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4752 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4754 if (dump_enabled_p ())
4755 dump_printf_loc (MSG_NOTE
, vect_location
,
4756 "proceeding using word mode.\n");
4759 /* Worthwhile without SIMD support? Check only during analysis. */
4760 if (!VECTOR_MODE_P (vec_mode
)
4762 && vf
< vect_min_worthwhile_factor (code
))
4764 if (dump_enabled_p ())
4765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4766 "not worthwhile without SIMD support.\n");
4770 if (!vec_stmt
) /* transformation not required. */
4772 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4773 if (dump_enabled_p ())
4774 dump_printf_loc (MSG_NOTE
, vect_location
,
4775 "=== vectorizable_operation ===\n");
4776 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4782 if (dump_enabled_p ())
4783 dump_printf_loc (MSG_NOTE
, vect_location
,
4784 "transform binary/unary operation.\n");
4787 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4789 /* In case the vectorization factor (VF) is bigger than the number
4790 of elements that we can fit in a vectype (nunits), we have to generate
4791 more than one vector stmt - i.e - we need to "unroll" the
4792 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4793 from one copy of the vector stmt to the next, in the field
4794 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4795 stages to find the correct vector defs to be used when vectorizing
4796 stmts that use the defs of the current stmt. The example below
4797 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4798 we need to create 4 vectorized stmts):
4800 before vectorization:
4801 RELATED_STMT VEC_STMT
4805 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4807 RELATED_STMT VEC_STMT
4808 VS1_0: vx0 = memref0 VS1_1 -
4809 VS1_1: vx1 = memref1 VS1_2 -
4810 VS1_2: vx2 = memref2 VS1_3 -
4811 VS1_3: vx3 = memref3 - -
4812 S1: x = load - VS1_0
4815 step2: vectorize stmt S2 (done here):
4816 To vectorize stmt S2 we first need to find the relevant vector
4817 def for the first operand 'x'. This is, as usual, obtained from
4818 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4819 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4820 relevant vector def 'vx0'. Having found 'vx0' we can generate
4821 the vector stmt VS2_0, and as usual, record it in the
4822 STMT_VINFO_VEC_STMT of stmt S2.
4823 When creating the second copy (VS2_1), we obtain the relevant vector
4824 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4825 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4826 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4827 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4828 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4829 chain of stmts and pointers:
4830 RELATED_STMT VEC_STMT
4831 VS1_0: vx0 = memref0 VS1_1 -
4832 VS1_1: vx1 = memref1 VS1_2 -
4833 VS1_2: vx2 = memref2 VS1_3 -
4834 VS1_3: vx3 = memref3 - -
4835 S1: x = load - VS1_0
4836 VS2_0: vz0 = vx0 + v1 VS2_1 -
4837 VS2_1: vz1 = vx1 + v1 VS2_2 -
4838 VS2_2: vz2 = vx2 + v1 VS2_3 -
4839 VS2_3: vz3 = vx3 + v1 - -
4840 S2: z = x + 1 - VS2_0 */
4842 prev_stmt_info
= NULL
;
4843 for (j
= 0; j
< ncopies
; j
++)
4848 if (op_type
== binary_op
|| op_type
== ternary_op
)
4849 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4852 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4854 if (op_type
== ternary_op
)
4856 vec_oprnds2
.create (1);
4857 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4864 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4865 if (op_type
== ternary_op
)
4867 tree vec_oprnd
= vec_oprnds2
.pop ();
4868 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4873 /* Arguments are ready. Create the new vector stmt. */
4874 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4876 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4877 ? vec_oprnds1
[i
] : NULL_TREE
);
4878 vop2
= ((op_type
== ternary_op
)
4879 ? vec_oprnds2
[i
] : NULL_TREE
);
4880 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
4882 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4883 gimple_assign_set_lhs (new_stmt
, new_temp
);
4884 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4886 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4893 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4895 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4896 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4899 vec_oprnds0
.release ();
4900 vec_oprnds1
.release ();
4901 vec_oprnds2
.release ();
4906 /* A helper function to ensure data reference DR's base alignment
4910 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4915 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4917 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4918 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4920 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4921 DECL_USER_ALIGN (base_decl
) = 1;
4922 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4927 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4928 reversal of the vector elements. If that is impossible to do,
4932 perm_mask_for_reverse (tree vectype
)
4937 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4938 sel
= XALLOCAVEC (unsigned char, nunits
);
4940 for (i
= 0; i
< nunits
; ++i
)
4941 sel
[i
] = nunits
- 1 - i
;
4943 return vect_gen_perm_mask (vectype
, sel
);
4946 /* Function vectorizable_store.
4948 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4950 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4951 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4952 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4955 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4961 tree vec_oprnd
= NULL_TREE
;
4962 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4963 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4964 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4966 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4967 struct loop
*loop
= NULL
;
4968 machine_mode vec_mode
;
4970 enum dr_alignment_support alignment_support_scheme
;
4973 enum vect_def_type dt
;
4974 stmt_vec_info prev_stmt_info
= NULL
;
4975 tree dataref_ptr
= NULL_TREE
;
4976 tree dataref_offset
= NULL_TREE
;
4977 gimple ptr_incr
= NULL
;
4978 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4981 gimple next_stmt
, first_stmt
= NULL
;
4982 bool grouped_store
= false;
4983 bool store_lanes_p
= false;
4984 unsigned int group_size
, i
;
4985 vec
<tree
> dr_chain
= vNULL
;
4986 vec
<tree
> oprnds
= vNULL
;
4987 vec
<tree
> result_chain
= vNULL
;
4989 bool negative
= false;
4990 tree offset
= NULL_TREE
;
4991 vec
<tree
> vec_oprnds
= vNULL
;
4992 bool slp
= (slp_node
!= NULL
);
4993 unsigned int vec_num
;
4994 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4998 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5000 /* Multiple types in SLP are handled by creating the appropriate number of
5001 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5003 if (slp
|| PURE_SLP_STMT (stmt_info
))
5006 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5008 gcc_assert (ncopies
>= 1);
5010 /* FORNOW. This restriction should be relaxed. */
5011 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5013 if (dump_enabled_p ())
5014 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5015 "multiple types in nested loop.\n");
5019 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5022 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5025 /* Is vectorizable store? */
5027 if (!is_gimple_assign (stmt
))
5030 scalar_dest
= gimple_assign_lhs (stmt
);
5031 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5032 && is_pattern_stmt_p (stmt_info
))
5033 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5034 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5035 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5036 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5037 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5038 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5039 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5040 && TREE_CODE (scalar_dest
) != MEM_REF
)
5043 gcc_assert (gimple_assign_single_p (stmt
));
5044 op
= gimple_assign_rhs1 (stmt
);
5045 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5048 if (dump_enabled_p ())
5049 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5050 "use not simple.\n");
5054 elem_type
= TREE_TYPE (vectype
);
5055 vec_mode
= TYPE_MODE (vectype
);
5057 /* FORNOW. In some cases can vectorize even if data-type not supported
5058 (e.g. - array initialization with 0). */
5059 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5062 if (!STMT_VINFO_DATA_REF (stmt_info
))
5066 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5067 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5068 size_zero_node
) < 0;
5069 if (negative
&& ncopies
> 1)
5071 if (dump_enabled_p ())
5072 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5073 "multiple types with negative step.\n");
5079 gcc_assert (!grouped_store
);
5080 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5081 if (alignment_support_scheme
!= dr_aligned
5082 && alignment_support_scheme
!= dr_unaligned_supported
)
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5086 "negative step but alignment required.\n");
5089 if (dt
!= vect_constant_def
5090 && dt
!= vect_external_def
5091 && !perm_mask_for_reverse (vectype
))
5093 if (dump_enabled_p ())
5094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5095 "negative step and reversing not supported.\n");
5100 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5102 grouped_store
= true;
5103 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5104 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5106 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5107 if (vect_store_lanes_supported (vectype
, group_size
))
5108 store_lanes_p
= true;
5109 else if (!vect_grouped_store_supported (vectype
, group_size
))
5113 if (first_stmt
== stmt
)
5115 /* STMT is the leader of the group. Check the operands of all the
5116 stmts of the group. */
5117 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5120 gcc_assert (gimple_assign_single_p (next_stmt
));
5121 op
= gimple_assign_rhs1 (next_stmt
);
5122 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5123 &def_stmt
, &def
, &dt
))
5125 if (dump_enabled_p ())
5126 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5127 "use not simple.\n");
5130 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5135 if (!vec_stmt
) /* transformation not required. */
5137 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5138 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5145 ensure_base_align (stmt_info
, dr
);
5149 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5150 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5152 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5155 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5157 /* We vectorize all the stmts of the interleaving group when we
5158 reach the last stmt in the group. */
5159 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5160 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5169 grouped_store
= false;
5170 /* VEC_NUM is the number of vect stmts to be created for this
5172 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5173 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5174 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5175 op
= gimple_assign_rhs1 (first_stmt
);
5178 /* VEC_NUM is the number of vect stmts to be created for this
5180 vec_num
= group_size
;
5186 group_size
= vec_num
= 1;
5189 if (dump_enabled_p ())
5190 dump_printf_loc (MSG_NOTE
, vect_location
,
5191 "transform store. ncopies = %d\n", ncopies
);
5193 dr_chain
.create (group_size
);
5194 oprnds
.create (group_size
);
5196 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5197 gcc_assert (alignment_support_scheme
);
5198 /* Targets with store-lane instructions must not require explicit
5200 gcc_assert (!store_lanes_p
5201 || alignment_support_scheme
== dr_aligned
5202 || alignment_support_scheme
== dr_unaligned_supported
);
5205 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5208 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5210 aggr_type
= vectype
;
5212 /* In case the vectorization factor (VF) is bigger than the number
5213 of elements that we can fit in a vectype (nunits), we have to generate
5214 more than one vector stmt - i.e - we need to "unroll" the
5215 vector stmt by a factor VF/nunits. For more details see documentation in
5216 vect_get_vec_def_for_copy_stmt. */
5218 /* In case of interleaving (non-unit grouped access):
5225 We create vectorized stores starting from base address (the access of the
5226 first stmt in the chain (S2 in the above example), when the last store stmt
5227 of the chain (S4) is reached:
5230 VS2: &base + vec_size*1 = vx0
5231 VS3: &base + vec_size*2 = vx1
5232 VS4: &base + vec_size*3 = vx3
5234 Then permutation statements are generated:
5236 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5237 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5240 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5241 (the order of the data-refs in the output of vect_permute_store_chain
5242 corresponds to the order of scalar stmts in the interleaving chain - see
5243 the documentation of vect_permute_store_chain()).
5245 In case of both multiple types and interleaving, above vector stores and
5246 permutation stmts are created for every copy. The result vector stmts are
5247 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5248 STMT_VINFO_RELATED_STMT for the next copies.
5251 prev_stmt_info
= NULL
;
5252 for (j
= 0; j
< ncopies
; j
++)
5260 /* Get vectorized arguments for SLP_NODE. */
5261 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5262 NULL
, slp_node
, -1);
5264 vec_oprnd
= vec_oprnds
[0];
5268 /* For interleaved stores we collect vectorized defs for all the
5269 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5270 used as an input to vect_permute_store_chain(), and OPRNDS as
5271 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5273 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5274 OPRNDS are of size 1. */
5275 next_stmt
= first_stmt
;
5276 for (i
= 0; i
< group_size
; i
++)
5278 /* Since gaps are not supported for interleaved stores,
5279 GROUP_SIZE is the exact number of stmts in the chain.
5280 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5281 there is no interleaving, GROUP_SIZE is 1, and only one
5282 iteration of the loop will be executed. */
5283 gcc_assert (next_stmt
5284 && gimple_assign_single_p (next_stmt
));
5285 op
= gimple_assign_rhs1 (next_stmt
);
5287 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5289 dr_chain
.quick_push (vec_oprnd
);
5290 oprnds
.quick_push (vec_oprnd
);
5291 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5295 /* We should have catched mismatched types earlier. */
5296 gcc_assert (useless_type_conversion_p (vectype
,
5297 TREE_TYPE (vec_oprnd
)));
5298 bool simd_lane_access_p
5299 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5300 if (simd_lane_access_p
5301 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5302 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5303 && integer_zerop (DR_OFFSET (first_dr
))
5304 && integer_zerop (DR_INIT (first_dr
))
5305 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5306 get_alias_set (DR_REF (first_dr
))))
5308 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5309 dataref_offset
= build_int_cst (reference_alias_ptr_type
5310 (DR_REF (first_dr
)), 0);
5315 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5316 simd_lane_access_p
? loop
: NULL
,
5317 offset
, &dummy
, gsi
, &ptr_incr
,
5318 simd_lane_access_p
, &inv_p
);
5319 gcc_assert (bb_vinfo
|| !inv_p
);
5323 /* For interleaved stores we created vectorized defs for all the
5324 defs stored in OPRNDS in the previous iteration (previous copy).
5325 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5326 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5328 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5329 OPRNDS are of size 1. */
5330 for (i
= 0; i
< group_size
; i
++)
5333 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5335 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5336 dr_chain
[i
] = vec_oprnd
;
5337 oprnds
[i
] = vec_oprnd
;
5341 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5342 TYPE_SIZE_UNIT (aggr_type
));
5344 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5345 TYPE_SIZE_UNIT (aggr_type
));
5352 /* Combine all the vectors into an array. */
5353 vec_array
= create_vector_array (vectype
, vec_num
);
5354 for (i
= 0; i
< vec_num
; i
++)
5356 vec_oprnd
= dr_chain
[i
];
5357 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5361 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5362 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5363 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5364 gimple_call_set_lhs (new_stmt
, data_ref
);
5365 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5373 result_chain
.create (group_size
);
5375 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5379 next_stmt
= first_stmt
;
5380 for (i
= 0; i
< vec_num
; i
++)
5382 unsigned align
, misalign
;
5385 /* Bump the vector pointer. */
5386 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5390 vec_oprnd
= vec_oprnds
[i
];
5391 else if (grouped_store
)
5392 /* For grouped stores vectorized defs are interleaved in
5393 vect_permute_store_chain(). */
5394 vec_oprnd
= result_chain
[i
];
5396 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5399 : build_int_cst (reference_alias_ptr_type
5400 (DR_REF (first_dr
)), 0));
5401 align
= TYPE_ALIGN_UNIT (vectype
);
5402 if (aligned_access_p (first_dr
))
5404 else if (DR_MISALIGNMENT (first_dr
) == -1)
5406 TREE_TYPE (data_ref
)
5407 = build_aligned_type (TREE_TYPE (data_ref
),
5408 TYPE_ALIGN (elem_type
));
5409 align
= TYPE_ALIGN_UNIT (elem_type
);
5414 TREE_TYPE (data_ref
)
5415 = build_aligned_type (TREE_TYPE (data_ref
),
5416 TYPE_ALIGN (elem_type
));
5417 misalign
= DR_MISALIGNMENT (first_dr
);
5419 if (dataref_offset
== NULL_TREE
)
5420 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5424 && dt
!= vect_constant_def
5425 && dt
!= vect_external_def
)
5427 tree perm_mask
= perm_mask_for_reverse (vectype
);
5429 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5431 tree new_temp
= make_ssa_name (perm_dest
, NULL
);
5433 /* Generate the permute statement. */
5435 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, new_temp
,
5436 vec_oprnd
, vec_oprnd
,
5438 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5440 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5441 vec_oprnd
= new_temp
;
5444 /* Arguments are ready. Create the new vector stmt. */
5445 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5446 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5451 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5459 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5461 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5462 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5466 dr_chain
.release ();
5468 result_chain
.release ();
5469 vec_oprnds
.release ();
5474 /* Given a vector type VECTYPE and permutation SEL returns
5475 the VECTOR_CST mask that implements the permutation of the
5476 vector elements. If that is impossible to do, returns NULL. */
5479 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
5481 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5484 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5486 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5489 mask_elt_type
= lang_hooks
.types
.type_for_mode
5490 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5491 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5493 mask_elts
= XALLOCAVEC (tree
, nunits
);
5494 for (i
= nunits
- 1; i
>= 0; i
--)
5495 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5496 mask_vec
= build_vector (mask_type
, mask_elts
);
5501 /* Given a vector variable X and Y, that was generated for the scalar
5502 STMT, generate instructions to permute the vector elements of X and Y
5503 using permutation mask MASK_VEC, insert them at *GSI and return the
5504 permuted vector variable. */
5507 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5508 gimple_stmt_iterator
*gsi
)
5510 tree vectype
= TREE_TYPE (x
);
5511 tree perm_dest
, data_ref
;
5514 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5515 data_ref
= make_ssa_name (perm_dest
, NULL
);
5517 /* Generate the permute statement. */
5518 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5520 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5525 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5526 inserting them on the loops preheader edge. Returns true if we
5527 were successful in doing so (and thus STMT can be moved then),
5528 otherwise returns false. */
5531 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5537 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5539 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5540 if (!gimple_nop_p (def_stmt
)
5541 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5543 /* Make sure we don't need to recurse. While we could do
5544 so in simple cases when there are more complex use webs
5545 we don't have an easy way to preserve stmt order to fulfil
5546 dependencies within them. */
5549 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5551 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5553 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5554 if (!gimple_nop_p (def_stmt2
)
5555 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5565 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5567 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5568 if (!gimple_nop_p (def_stmt
)
5569 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5571 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5572 gsi_remove (&gsi
, false);
5573 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5580 /* vectorizable_load.
5582 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5584 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5585 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5586 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5589 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5590 slp_tree slp_node
, slp_instance slp_node_instance
)
5593 tree vec_dest
= NULL
;
5594 tree data_ref
= NULL
;
5595 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5596 stmt_vec_info prev_stmt_info
;
5597 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5598 struct loop
*loop
= NULL
;
5599 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5600 bool nested_in_vect_loop
= false;
5601 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5602 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5606 gimple new_stmt
= NULL
;
5608 enum dr_alignment_support alignment_support_scheme
;
5609 tree dataref_ptr
= NULL_TREE
;
5610 tree dataref_offset
= NULL_TREE
;
5611 gimple ptr_incr
= NULL
;
5612 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5614 int i
, j
, group_size
, group_gap
;
5615 tree msq
= NULL_TREE
, lsq
;
5616 tree offset
= NULL_TREE
;
5617 tree byte_offset
= NULL_TREE
;
5618 tree realignment_token
= NULL_TREE
;
5620 vec
<tree
> dr_chain
= vNULL
;
5621 bool grouped_load
= false;
5622 bool load_lanes_p
= false;
5625 bool negative
= false;
5626 bool compute_in_loop
= false;
5627 struct loop
*at_loop
;
5629 bool slp
= (slp_node
!= NULL
);
5630 bool slp_perm
= false;
5631 enum tree_code code
;
5632 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5635 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5636 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5637 int gather_scale
= 1;
5638 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5642 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5643 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5644 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5649 /* Multiple types in SLP are handled by creating the appropriate number of
5650 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5652 if (slp
|| PURE_SLP_STMT (stmt_info
))
5655 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5657 gcc_assert (ncopies
>= 1);
5659 /* FORNOW. This restriction should be relaxed. */
5660 if (nested_in_vect_loop
&& ncopies
> 1)
5662 if (dump_enabled_p ())
5663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5664 "multiple types in nested loop.\n");
5668 /* Invalidate assumptions made by dependence analysis when vectorization
5669 on the unrolled body effectively re-orders stmts. */
5671 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5672 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5673 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5675 if (dump_enabled_p ())
5676 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5677 "cannot perform implicit CSE when unrolling "
5678 "with negative dependence distance\n");
5682 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5685 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5688 /* Is vectorizable load? */
5689 if (!is_gimple_assign (stmt
))
5692 scalar_dest
= gimple_assign_lhs (stmt
);
5693 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5696 code
= gimple_assign_rhs_code (stmt
);
5697 if (code
!= ARRAY_REF
5698 && code
!= BIT_FIELD_REF
5699 && code
!= INDIRECT_REF
5700 && code
!= COMPONENT_REF
5701 && code
!= IMAGPART_EXPR
5702 && code
!= REALPART_EXPR
5704 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5707 if (!STMT_VINFO_DATA_REF (stmt_info
))
5710 elem_type
= TREE_TYPE (vectype
);
5711 mode
= TYPE_MODE (vectype
);
5713 /* FORNOW. In some cases can vectorize even if data-type not supported
5714 (e.g. - data copies). */
5715 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5717 if (dump_enabled_p ())
5718 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5719 "Aligned load, but unsupported type.\n");
5723 /* Check if the load is a part of an interleaving chain. */
5724 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5726 grouped_load
= true;
5728 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5730 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5731 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5733 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5734 if (vect_load_lanes_supported (vectype
, group_size
))
5735 load_lanes_p
= true;
5736 else if (!vect_grouped_load_supported (vectype
, group_size
))
5740 /* Invalidate assumptions made by dependence analysis when vectorization
5741 on the unrolled body effectively re-orders stmts. */
5742 if (!PURE_SLP_STMT (stmt_info
)
5743 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5744 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5745 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5747 if (dump_enabled_p ())
5748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5749 "cannot perform implicit CSE when performing "
5750 "group loads with negative dependence distance\n");
5756 if (STMT_VINFO_GATHER_P (stmt_info
))
5760 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5761 &gather_off
, &gather_scale
);
5762 gcc_assert (gather_decl
);
5763 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5764 &def_stmt
, &def
, &gather_dt
,
5765 &gather_off_vectype
))
5767 if (dump_enabled_p ())
5768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5769 "gather index use not simple.\n");
5773 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5777 negative
= tree_int_cst_compare (nested_in_vect_loop
5778 ? STMT_VINFO_DR_STEP (stmt_info
)
5780 size_zero_node
) < 0;
5781 if (negative
&& ncopies
> 1)
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5785 "multiple types with negative step.\n");
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5795 "negative step for group load not supported"
5799 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5800 if (alignment_support_scheme
!= dr_aligned
5801 && alignment_support_scheme
!= dr_unaligned_supported
)
5803 if (dump_enabled_p ())
5804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5805 "negative step but alignment required.\n");
5808 if (!perm_mask_for_reverse (vectype
))
5810 if (dump_enabled_p ())
5811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5812 "negative step and reversing not supported."
5819 if (!vec_stmt
) /* transformation not required. */
5821 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5822 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
5826 if (dump_enabled_p ())
5827 dump_printf_loc (MSG_NOTE
, vect_location
,
5828 "transform load. ncopies = %d\n", ncopies
);
5832 ensure_base_align (stmt_info
, dr
);
5834 if (STMT_VINFO_GATHER_P (stmt_info
))
5836 tree vec_oprnd0
= NULL_TREE
, op
;
5837 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
5838 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5839 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
5840 edge pe
= loop_preheader_edge (loop
);
5843 enum { NARROW
, NONE
, WIDEN
} modifier
;
5844 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
5846 if (nunits
== gather_off_nunits
)
5848 else if (nunits
== gather_off_nunits
/ 2)
5850 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
5853 for (i
= 0; i
< gather_off_nunits
; ++i
)
5854 sel
[i
] = i
| nunits
;
5856 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
5857 gcc_assert (perm_mask
!= NULL_TREE
);
5859 else if (nunits
== gather_off_nunits
* 2)
5861 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5864 for (i
= 0; i
< nunits
; ++i
)
5865 sel
[i
] = i
< gather_off_nunits
5866 ? i
: i
+ nunits
- gather_off_nunits
;
5868 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
5869 gcc_assert (perm_mask
!= NULL_TREE
);
5875 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
5876 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5877 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5878 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5879 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5880 scaletype
= TREE_VALUE (arglist
);
5881 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
5883 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5885 ptr
= fold_convert (ptrtype
, gather_base
);
5886 if (!is_gimple_min_invariant (ptr
))
5888 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5889 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5890 gcc_assert (!new_bb
);
5893 /* Currently we support only unconditional gather loads,
5894 so mask should be all ones. */
5895 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
5896 mask
= build_int_cst (masktype
, -1);
5897 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
5899 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
5900 mask
= build_vector_from_val (masktype
, mask
);
5901 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5903 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
5907 for (j
= 0; j
< 6; ++j
)
5909 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
5910 mask
= build_real (TREE_TYPE (masktype
), r
);
5911 mask
= build_vector_from_val (masktype
, mask
);
5912 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5917 scale
= build_int_cst (scaletype
, gather_scale
);
5919 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
5920 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
5921 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
5925 for (j
= 0; j
< 6; ++j
)
5927 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
5928 merge
= build_real (TREE_TYPE (rettype
), r
);
5932 merge
= build_vector_from_val (rettype
, merge
);
5933 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
5935 prev_stmt_info
= NULL
;
5936 for (j
= 0; j
< ncopies
; ++j
)
5938 if (modifier
== WIDEN
&& (j
& 1))
5939 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
5940 perm_mask
, stmt
, gsi
);
5943 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
5946 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
5948 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5950 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5951 == TYPE_VECTOR_SUBPARTS (idxtype
));
5952 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
5953 var
= make_ssa_name (var
, NULL
);
5954 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5956 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
5958 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5963 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
5965 if (!useless_type_conversion_p (vectype
, rettype
))
5967 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
5968 == TYPE_VECTOR_SUBPARTS (rettype
));
5969 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
5970 op
= make_ssa_name (var
, new_stmt
);
5971 gimple_call_set_lhs (new_stmt
, op
);
5972 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5973 var
= make_ssa_name (vec_dest
, NULL
);
5974 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
5976 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
5981 var
= make_ssa_name (vec_dest
, new_stmt
);
5982 gimple_call_set_lhs (new_stmt
, var
);
5985 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5987 if (modifier
== NARROW
)
5994 var
= permute_vec_elements (prev_res
, var
,
5995 perm_mask
, stmt
, gsi
);
5996 new_stmt
= SSA_NAME_DEF_STMT (var
);
5999 if (prev_stmt_info
== NULL
)
6000 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6002 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6003 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6007 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
6009 gimple_stmt_iterator incr_gsi
;
6015 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6016 gimple_seq stmts
= NULL
;
6017 tree stride_base
, stride_step
, alias_off
;
6019 gcc_assert (!nested_in_vect_loop
);
6022 = fold_build_pointer_plus
6023 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6024 size_binop (PLUS_EXPR
,
6025 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6026 convert_to_ptrofftype (DR_INIT (dr
))));
6027 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6029 /* For a load with loop-invariant (but other than power-of-2)
6030 stride (i.e. not a grouped access) like so:
6032 for (i = 0; i < n; i += stride)
6035 we generate a new induction variable and new accesses to
6036 form a new vector (or vectors, depending on ncopies):
6038 for (j = 0; ; j += VF*stride)
6040 tmp2 = array[j + stride];
6042 vectemp = {tmp1, tmp2, ...}
6045 ivstep
= stride_step
;
6046 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6047 build_int_cst (TREE_TYPE (ivstep
), vf
));
6049 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6051 create_iv (stride_base
, ivstep
, NULL
,
6052 loop
, &incr_gsi
, insert_after
,
6054 incr
= gsi_stmt (incr_gsi
);
6055 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6057 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6059 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6061 prev_stmt_info
= NULL
;
6062 running_off
= offvar
;
6063 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6064 for (j
= 0; j
< ncopies
; j
++)
6068 vec_alloc (v
, nunits
);
6069 for (i
= 0; i
< nunits
; i
++)
6071 tree newref
, newoff
;
6073 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
6074 running_off
, alias_off
);
6076 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6079 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6080 newoff
= copy_ssa_name (running_off
, NULL
);
6081 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
6082 running_off
, stride_step
);
6083 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6085 running_off
= newoff
;
6088 vec_inv
= build_constructor (vectype
, v
);
6089 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6090 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6093 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6095 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6096 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6103 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6105 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6106 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6107 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6109 /* Check if the chain of loads is already vectorized. */
6110 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6111 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6112 ??? But we can only do so if there is exactly one
6113 as we have no way to get at the rest. Leave the CSE
6115 ??? With the group load eventually participating
6116 in multiple different permutations (having multiple
6117 slp nodes which refer to the same group) the CSE
6118 is even wrong code. See PR56270. */
6121 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6124 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6125 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6127 /* VEC_NUM is the number of vect stmts to be created for this group. */
6130 grouped_load
= false;
6131 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6132 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6134 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6138 vec_num
= group_size
;
6146 group_size
= vec_num
= 1;
6150 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6151 gcc_assert (alignment_support_scheme
);
6152 /* Targets with load-lane instructions must not require explicit
6154 gcc_assert (!load_lanes_p
6155 || alignment_support_scheme
== dr_aligned
6156 || alignment_support_scheme
== dr_unaligned_supported
);
6158 /* In case the vectorization factor (VF) is bigger than the number
6159 of elements that we can fit in a vectype (nunits), we have to generate
6160 more than one vector stmt - i.e - we need to "unroll" the
6161 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6162 from one copy of the vector stmt to the next, in the field
6163 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6164 stages to find the correct vector defs to be used when vectorizing
6165 stmts that use the defs of the current stmt. The example below
6166 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6167 need to create 4 vectorized stmts):
6169 before vectorization:
6170 RELATED_STMT VEC_STMT
6174 step 1: vectorize stmt S1:
6175 We first create the vector stmt VS1_0, and, as usual, record a
6176 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6177 Next, we create the vector stmt VS1_1, and record a pointer to
6178 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6179 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6181 RELATED_STMT VEC_STMT
6182 VS1_0: vx0 = memref0 VS1_1 -
6183 VS1_1: vx1 = memref1 VS1_2 -
6184 VS1_2: vx2 = memref2 VS1_3 -
6185 VS1_3: vx3 = memref3 - -
6186 S1: x = load - VS1_0
6189 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6190 information we recorded in RELATED_STMT field is used to vectorize
6193 /* In case of interleaving (non-unit grouped access):
6200 Vectorized loads are created in the order of memory accesses
6201 starting from the access of the first stmt of the chain:
6204 VS2: vx1 = &base + vec_size*1
6205 VS3: vx3 = &base + vec_size*2
6206 VS4: vx4 = &base + vec_size*3
6208 Then permutation statements are generated:
6210 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6211 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6214 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6215 (the order of the data-refs in the output of vect_permute_load_chain
6216 corresponds to the order of scalar stmts in the interleaving chain - see
6217 the documentation of vect_permute_load_chain()).
6218 The generation of permutation stmts and recording them in
6219 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6221 In case of both multiple types and interleaving, the vector loads and
6222 permutation stmts above are created for every copy. The result vector
6223 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6224 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6226 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6227 on a target that supports unaligned accesses (dr_unaligned_supported)
6228 we generate the following code:
6232 p = p + indx * vectype_size;
6237 Otherwise, the data reference is potentially unaligned on a target that
6238 does not support unaligned accesses (dr_explicit_realign_optimized) -
6239 then generate the following code, in which the data in each iteration is
6240 obtained by two vector loads, one from the previous iteration, and one
6241 from the current iteration:
6243 msq_init = *(floor(p1))
6244 p2 = initial_addr + VS - 1;
6245 realignment_token = call target_builtin;
6248 p2 = p2 + indx * vectype_size
6250 vec_dest = realign_load (msq, lsq, realignment_token)
6255 /* If the misalignment remains the same throughout the execution of the
6256 loop, we can create the init_addr and permutation mask at the loop
6257 preheader. Otherwise, it needs to be created inside the loop.
6258 This can only occur when vectorizing memory accesses in the inner-loop
6259 nested within an outer-loop that is being vectorized. */
6261 if (nested_in_vect_loop
6262 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6263 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6265 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6266 compute_in_loop
= true;
6269 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6270 || alignment_support_scheme
== dr_explicit_realign
)
6271 && !compute_in_loop
)
6273 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6274 alignment_support_scheme
, NULL_TREE
,
6276 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6278 phi
= SSA_NAME_DEF_STMT (msq
);
6279 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6287 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6290 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6292 aggr_type
= vectype
;
6294 prev_stmt_info
= NULL
;
6295 for (j
= 0; j
< ncopies
; j
++)
6297 /* 1. Create the vector or array pointer update chain. */
6300 bool simd_lane_access_p
6301 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6302 if (simd_lane_access_p
6303 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6304 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6305 && integer_zerop (DR_OFFSET (first_dr
))
6306 && integer_zerop (DR_INIT (first_dr
))
6307 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6308 get_alias_set (DR_REF (first_dr
)))
6309 && (alignment_support_scheme
== dr_aligned
6310 || alignment_support_scheme
== dr_unaligned_supported
))
6312 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6313 dataref_offset
= build_int_cst (reference_alias_ptr_type
6314 (DR_REF (first_dr
)), 0);
6319 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6320 offset
, &dummy
, gsi
, &ptr_incr
,
6321 simd_lane_access_p
, &inv_p
,
6324 else if (dataref_offset
)
6325 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6326 TYPE_SIZE_UNIT (aggr_type
));
6328 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6329 TYPE_SIZE_UNIT (aggr_type
));
6331 if (grouped_load
|| slp_perm
)
6332 dr_chain
.create (vec_num
);
6338 vec_array
= create_vector_array (vectype
, vec_num
);
6341 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6342 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6343 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6344 gimple_call_set_lhs (new_stmt
, vec_array
);
6345 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6347 /* Extract each vector into an SSA_NAME. */
6348 for (i
= 0; i
< vec_num
; i
++)
6350 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6352 dr_chain
.quick_push (new_temp
);
6355 /* Record the mapping between SSA_NAMEs and statements. */
6356 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6360 for (i
= 0; i
< vec_num
; i
++)
6363 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6366 /* 2. Create the vector-load in the loop. */
6367 switch (alignment_support_scheme
)
6370 case dr_unaligned_supported
:
6372 unsigned int align
, misalign
;
6375 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6378 : build_int_cst (reference_alias_ptr_type
6379 (DR_REF (first_dr
)), 0));
6380 align
= TYPE_ALIGN_UNIT (vectype
);
6381 if (alignment_support_scheme
== dr_aligned
)
6383 gcc_assert (aligned_access_p (first_dr
));
6386 else if (DR_MISALIGNMENT (first_dr
) == -1)
6388 TREE_TYPE (data_ref
)
6389 = build_aligned_type (TREE_TYPE (data_ref
),
6390 TYPE_ALIGN (elem_type
));
6391 align
= TYPE_ALIGN_UNIT (elem_type
);
6396 TREE_TYPE (data_ref
)
6397 = build_aligned_type (TREE_TYPE (data_ref
),
6398 TYPE_ALIGN (elem_type
));
6399 misalign
= DR_MISALIGNMENT (first_dr
);
6401 if (dataref_offset
== NULL_TREE
)
6402 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6406 case dr_explicit_realign
:
6411 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6413 if (compute_in_loop
)
6414 msq
= vect_setup_realignment (first_stmt
, gsi
,
6416 dr_explicit_realign
,
6419 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
6420 new_stmt
= gimple_build_assign_with_ops
6421 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
6423 (TREE_TYPE (dataref_ptr
),
6424 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6425 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6427 = build2 (MEM_REF
, vectype
, ptr
,
6428 build_int_cst (reference_alias_ptr_type
6429 (DR_REF (first_dr
)), 0));
6430 vec_dest
= vect_create_destination_var (scalar_dest
,
6432 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6433 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6434 gimple_assign_set_lhs (new_stmt
, new_temp
);
6435 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6436 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6437 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6440 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6441 TYPE_SIZE_UNIT (elem_type
));
6442 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6443 new_stmt
= gimple_build_assign_with_ops
6444 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
6447 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6448 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6449 gimple_assign_set_lhs (new_stmt
, ptr
);
6450 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6452 = build2 (MEM_REF
, vectype
, ptr
,
6453 build_int_cst (reference_alias_ptr_type
6454 (DR_REF (first_dr
)), 0));
6457 case dr_explicit_realign_optimized
:
6458 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
6459 new_stmt
= gimple_build_assign_with_ops
6460 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
6462 (TREE_TYPE (dataref_ptr
),
6463 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6464 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6466 = build2 (MEM_REF
, vectype
, new_temp
,
6467 build_int_cst (reference_alias_ptr_type
6468 (DR_REF (first_dr
)), 0));
6473 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6474 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6475 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6476 gimple_assign_set_lhs (new_stmt
, new_temp
);
6477 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6479 /* 3. Handle explicit realignment if necessary/supported.
6481 vec_dest = realign_load (msq, lsq, realignment_token) */
6482 if (alignment_support_scheme
== dr_explicit_realign_optimized
6483 || alignment_support_scheme
== dr_explicit_realign
)
6485 lsq
= gimple_assign_lhs (new_stmt
);
6486 if (!realignment_token
)
6487 realignment_token
= dataref_ptr
;
6488 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6490 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
6493 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6494 gimple_assign_set_lhs (new_stmt
, new_temp
);
6495 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6497 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6500 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6501 add_phi_arg (phi
, lsq
,
6502 loop_latch_edge (containing_loop
),
6508 /* 4. Handle invariant-load. */
6509 if (inv_p
&& !bb_vinfo
)
6511 gcc_assert (!grouped_load
);
6512 /* If we have versioned for aliasing or the loop doesn't
6513 have any data dependencies that would preclude this,
6514 then we are sure this is a loop invariant load and
6515 thus we can insert it on the preheader edge. */
6516 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6517 && !nested_in_vect_loop
6518 && hoist_defs_of_uses (stmt
, loop
))
6520 if (dump_enabled_p ())
6522 dump_printf_loc (MSG_NOTE
, vect_location
,
6523 "hoisting out of the vectorized "
6525 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6526 dump_printf (MSG_NOTE
, "\n");
6528 tree tem
= copy_ssa_name (scalar_dest
, NULL
);
6529 gsi_insert_on_edge_immediate
6530 (loop_preheader_edge (loop
),
6531 gimple_build_assign (tem
,
6533 (gimple_assign_rhs1 (stmt
))));
6534 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6538 gimple_stmt_iterator gsi2
= *gsi
;
6540 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6543 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6544 set_vinfo_for_stmt (new_stmt
,
6545 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6551 tree perm_mask
= perm_mask_for_reverse (vectype
);
6552 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6553 perm_mask
, stmt
, gsi
);
6554 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6557 /* Collect vector loads and later create their permutation in
6558 vect_transform_grouped_load (). */
6559 if (grouped_load
|| slp_perm
)
6560 dr_chain
.quick_push (new_temp
);
6562 /* Store vector loads in the corresponding SLP_NODE. */
6563 if (slp
&& !slp_perm
)
6564 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6566 /* Bump the vector pointer to account for a gap. */
6567 if (slp
&& group_gap
!= 0)
6569 tree bump
= size_binop (MULT_EXPR
,
6570 TYPE_SIZE_UNIT (elem_type
),
6571 size_int (group_gap
));
6572 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6577 if (slp
&& !slp_perm
)
6582 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6583 slp_node_instance
, false))
6585 dr_chain
.release ();
6594 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6595 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6600 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6602 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6603 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6606 dr_chain
.release ();
6612 /* Function vect_is_simple_cond.
6615 LOOP - the loop that is being vectorized.
6616 COND - Condition that is checked for simple use.
6619 *COMP_VECTYPE - the vector type for the comparison.
6621 Returns whether a COND can be vectorized. Checks whether
6622 condition operands are supportable using vec_is_simple_use. */
6625 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6626 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6630 enum vect_def_type dt
;
6631 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6633 if (!COMPARISON_CLASS_P (cond
))
6636 lhs
= TREE_OPERAND (cond
, 0);
6637 rhs
= TREE_OPERAND (cond
, 1);
6639 if (TREE_CODE (lhs
) == SSA_NAME
)
6641 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6642 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6643 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6646 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6647 && TREE_CODE (lhs
) != FIXED_CST
)
6650 if (TREE_CODE (rhs
) == SSA_NAME
)
6652 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6653 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6654 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6657 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6658 && TREE_CODE (rhs
) != FIXED_CST
)
6661 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6665 /* vectorizable_condition.
6667 Check if STMT is conditional modify expression that can be vectorized.
6668 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6669 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6672 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6673 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6674 else caluse if it is 2).
6676 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6679 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6680 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6683 tree scalar_dest
= NULL_TREE
;
6684 tree vec_dest
= NULL_TREE
;
6685 tree cond_expr
, then_clause
, else_clause
;
6686 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6687 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6688 tree comp_vectype
= NULL_TREE
;
6689 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6690 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6691 tree vec_compare
, vec_cond_expr
;
6693 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6695 enum vect_def_type dt
, dts
[4];
6696 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6698 enum tree_code code
;
6699 stmt_vec_info prev_stmt_info
= NULL
;
6701 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6702 vec
<tree
> vec_oprnds0
= vNULL
;
6703 vec
<tree
> vec_oprnds1
= vNULL
;
6704 vec
<tree
> vec_oprnds2
= vNULL
;
6705 vec
<tree
> vec_oprnds3
= vNULL
;
6708 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6711 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6713 gcc_assert (ncopies
>= 1);
6714 if (reduc_index
&& ncopies
> 1)
6715 return false; /* FORNOW */
6717 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6720 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6723 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6724 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6728 /* FORNOW: not yet supported. */
6729 if (STMT_VINFO_LIVE_P (stmt_info
))
6731 if (dump_enabled_p ())
6732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6733 "value used after loop.\n");
6737 /* Is vectorizable conditional operation? */
6738 if (!is_gimple_assign (stmt
))
6741 code
= gimple_assign_rhs_code (stmt
);
6743 if (code
!= COND_EXPR
)
6746 cond_expr
= gimple_assign_rhs1 (stmt
);
6747 then_clause
= gimple_assign_rhs2 (stmt
);
6748 else_clause
= gimple_assign_rhs3 (stmt
);
6750 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6755 if (TREE_CODE (then_clause
) == SSA_NAME
)
6757 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6758 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6759 &then_def_stmt
, &def
, &dt
))
6762 else if (TREE_CODE (then_clause
) != INTEGER_CST
6763 && TREE_CODE (then_clause
) != REAL_CST
6764 && TREE_CODE (then_clause
) != FIXED_CST
)
6767 if (TREE_CODE (else_clause
) == SSA_NAME
)
6769 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6770 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6771 &else_def_stmt
, &def
, &dt
))
6774 else if (TREE_CODE (else_clause
) != INTEGER_CST
6775 && TREE_CODE (else_clause
) != REAL_CST
6776 && TREE_CODE (else_clause
) != FIXED_CST
)
6779 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
6780 /* The result of a vector comparison should be signed type. */
6781 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
6782 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
6783 if (vec_cmp_type
== NULL_TREE
)
6788 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6789 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
6796 vec_oprnds0
.create (1);
6797 vec_oprnds1
.create (1);
6798 vec_oprnds2
.create (1);
6799 vec_oprnds3
.create (1);
6803 scalar_dest
= gimple_assign_lhs (stmt
);
6804 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6806 /* Handle cond expr. */
6807 for (j
= 0; j
< ncopies
; j
++)
6809 gimple new_stmt
= NULL
;
6814 auto_vec
<tree
, 4> ops
;
6815 auto_vec
<vec
<tree
>, 4> vec_defs
;
6817 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
6818 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
6819 ops
.safe_push (then_clause
);
6820 ops
.safe_push (else_clause
);
6821 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
6822 vec_oprnds3
= vec_defs
.pop ();
6823 vec_oprnds2
= vec_defs
.pop ();
6824 vec_oprnds1
= vec_defs
.pop ();
6825 vec_oprnds0
= vec_defs
.pop ();
6828 vec_defs
.release ();
6834 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
6836 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
6837 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
6840 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
6842 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
6843 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
6844 if (reduc_index
== 1)
6845 vec_then_clause
= reduc_def
;
6848 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
6850 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
6851 NULL
, >emp
, &def
, &dts
[2]);
6853 if (reduc_index
== 2)
6854 vec_else_clause
= reduc_def
;
6857 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
6859 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
6860 NULL
, >emp
, &def
, &dts
[3]);
6866 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
6867 vec_oprnds0
.pop ());
6868 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
6869 vec_oprnds1
.pop ());
6870 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
6871 vec_oprnds2
.pop ());
6872 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
6873 vec_oprnds3
.pop ());
6878 vec_oprnds0
.quick_push (vec_cond_lhs
);
6879 vec_oprnds1
.quick_push (vec_cond_rhs
);
6880 vec_oprnds2
.quick_push (vec_then_clause
);
6881 vec_oprnds3
.quick_push (vec_else_clause
);
6884 /* Arguments are ready. Create the new vector stmt. */
6885 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
6887 vec_cond_rhs
= vec_oprnds1
[i
];
6888 vec_then_clause
= vec_oprnds2
[i
];
6889 vec_else_clause
= vec_oprnds3
[i
];
6891 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
6892 vec_cond_lhs
, vec_cond_rhs
);
6893 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6894 vec_compare
, vec_then_clause
, vec_else_clause
);
6896 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6897 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6898 gimple_assign_set_lhs (new_stmt
, new_temp
);
6899 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6901 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6908 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6910 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6912 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6915 vec_oprnds0
.release ();
6916 vec_oprnds1
.release ();
6917 vec_oprnds2
.release ();
6918 vec_oprnds3
.release ();
6924 /* Make sure the statement is vectorizable. */
6927 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
6929 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6930 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6931 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
6933 tree scalar_type
, vectype
;
6934 gimple pattern_stmt
;
6935 gimple_seq pattern_def_seq
;
6937 if (dump_enabled_p ())
6939 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
6940 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6941 dump_printf (MSG_NOTE
, "\n");
6944 if (gimple_has_volatile_ops (stmt
))
6946 if (dump_enabled_p ())
6947 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6948 "not vectorized: stmt has volatile operands\n");
6953 /* Skip stmts that do not need to be vectorized. In loops this is expected
6955 - the COND_EXPR which is the loop exit condition
6956 - any LABEL_EXPRs in the loop
6957 - computations that are used only for array indexing or loop control.
6958 In basic blocks we only analyze statements that are a part of some SLP
6959 instance, therefore, all the statements are relevant.
6961 Pattern statement needs to be analyzed instead of the original statement
6962 if the original statement is not relevant. Otherwise, we analyze both
6963 statements. In basic blocks we are called from some SLP instance
6964 traversal, don't analyze pattern stmts instead, the pattern stmts
6965 already will be part of SLP instance. */
6967 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
6968 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6969 && !STMT_VINFO_LIVE_P (stmt_info
))
6971 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6973 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6974 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6976 /* Analyze PATTERN_STMT instead of the original stmt. */
6977 stmt
= pattern_stmt
;
6978 stmt_info
= vinfo_for_stmt (pattern_stmt
);
6979 if (dump_enabled_p ())
6981 dump_printf_loc (MSG_NOTE
, vect_location
,
6982 "==> examining pattern statement: ");
6983 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6984 dump_printf (MSG_NOTE
, "\n");
6989 if (dump_enabled_p ())
6990 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
6995 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6998 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6999 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7001 /* Analyze PATTERN_STMT too. */
7002 if (dump_enabled_p ())
7004 dump_printf_loc (MSG_NOTE
, vect_location
,
7005 "==> examining pattern statement: ");
7006 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7007 dump_printf (MSG_NOTE
, "\n");
7010 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7014 if (is_pattern_stmt_p (stmt_info
)
7016 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7018 gimple_stmt_iterator si
;
7020 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7022 gimple pattern_def_stmt
= gsi_stmt (si
);
7023 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7024 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7026 /* Analyze def stmt of STMT if it's a pattern stmt. */
7027 if (dump_enabled_p ())
7029 dump_printf_loc (MSG_NOTE
, vect_location
,
7030 "==> examining pattern def statement: ");
7031 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7032 dump_printf (MSG_NOTE
, "\n");
7035 if (!vect_analyze_stmt (pattern_def_stmt
,
7036 need_to_vectorize
, node
))
7042 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7044 case vect_internal_def
:
7047 case vect_reduction_def
:
7048 case vect_nested_cycle
:
7049 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7050 || relevance
== vect_used_in_outer_by_reduction
7051 || relevance
== vect_unused_in_scope
));
7054 case vect_induction_def
:
7055 case vect_constant_def
:
7056 case vect_external_def
:
7057 case vect_unknown_def_type
:
7064 gcc_assert (PURE_SLP_STMT (stmt_info
));
7066 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7067 if (dump_enabled_p ())
7069 dump_printf_loc (MSG_NOTE
, vect_location
,
7070 "get vectype for scalar type: ");
7071 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7072 dump_printf (MSG_NOTE
, "\n");
7075 vectype
= get_vectype_for_scalar_type (scalar_type
);
7078 if (dump_enabled_p ())
7080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7081 "not SLPed: unsupported data-type ");
7082 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7084 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7089 if (dump_enabled_p ())
7091 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7092 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7093 dump_printf (MSG_NOTE
, "\n");
7096 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7099 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7101 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7102 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7103 || (is_gimple_call (stmt
)
7104 && gimple_call_lhs (stmt
) == NULL_TREE
));
7105 *need_to_vectorize
= true;
7110 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7111 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7112 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7113 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7114 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7115 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7116 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7117 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7118 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7119 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7120 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7121 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7125 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7126 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7127 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7128 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7129 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7130 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7131 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7132 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7133 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7138 if (dump_enabled_p ())
7140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7141 "not vectorized: relevant stmt not ");
7142 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7143 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7144 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7153 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7154 need extra handling, except for vectorizable reductions. */
7155 if (STMT_VINFO_LIVE_P (stmt_info
)
7156 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7157 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7161 if (dump_enabled_p ())
7163 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7164 "not vectorized: live stmt not ");
7165 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7166 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7167 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7177 /* Function vect_transform_stmt.
7179 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7182 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7183 bool *grouped_store
, slp_tree slp_node
,
7184 slp_instance slp_node_instance
)
7186 bool is_store
= false;
7187 gimple vec_stmt
= NULL
;
7188 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7191 switch (STMT_VINFO_TYPE (stmt_info
))
7193 case type_demotion_vec_info_type
:
7194 case type_promotion_vec_info_type
:
7195 case type_conversion_vec_info_type
:
7196 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7200 case induc_vec_info_type
:
7201 gcc_assert (!slp_node
);
7202 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7206 case shift_vec_info_type
:
7207 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7211 case op_vec_info_type
:
7212 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7216 case assignment_vec_info_type
:
7217 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7221 case load_vec_info_type
:
7222 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7227 case store_vec_info_type
:
7228 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7230 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7232 /* In case of interleaving, the whole chain is vectorized when the
7233 last store in the chain is reached. Store stmts before the last
7234 one are skipped, and there vec_stmt_info shouldn't be freed
7236 *grouped_store
= true;
7237 if (STMT_VINFO_VEC_STMT (stmt_info
))
7244 case condition_vec_info_type
:
7245 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7249 case call_vec_info_type
:
7250 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7251 stmt
= gsi_stmt (*gsi
);
7252 if (is_gimple_call (stmt
)
7253 && gimple_call_internal_p (stmt
)
7254 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7258 case call_simd_clone_vec_info_type
:
7259 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7260 stmt
= gsi_stmt (*gsi
);
7263 case reduc_vec_info_type
:
7264 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7269 if (!STMT_VINFO_LIVE_P (stmt_info
))
7271 if (dump_enabled_p ())
7272 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7273 "stmt not supported.\n");
7278 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7279 is being vectorized, but outside the immediately enclosing loop. */
7281 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7282 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7283 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7284 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7285 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7286 || STMT_VINFO_RELEVANT (stmt_info
) ==
7287 vect_used_in_outer_by_reduction
))
7289 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7290 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7291 imm_use_iterator imm_iter
;
7292 use_operand_p use_p
;
7296 if (dump_enabled_p ())
7297 dump_printf_loc (MSG_NOTE
, vect_location
,
7298 "Record the vdef for outer-loop vectorization.\n");
7300 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7301 (to be used when vectorizing outer-loop stmts that use the DEF of
7303 if (gimple_code (stmt
) == GIMPLE_PHI
)
7304 scalar_dest
= PHI_RESULT (stmt
);
7306 scalar_dest
= gimple_assign_lhs (stmt
);
7308 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7310 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7312 exit_phi
= USE_STMT (use_p
);
7313 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7318 /* Handle stmts whose DEF is used outside the loop-nest that is
7319 being vectorized. */
7320 if (STMT_VINFO_LIVE_P (stmt_info
)
7321 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7323 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7328 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7334 /* Remove a group of stores (for SLP or interleaving), free their
7338 vect_remove_stores (gimple first_stmt
)
7340 gimple next
= first_stmt
;
7342 gimple_stmt_iterator next_si
;
7346 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7348 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7349 if (is_pattern_stmt_p (stmt_info
))
7350 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7351 /* Free the attached stmt_vec_info and remove the stmt. */
7352 next_si
= gsi_for_stmt (next
);
7353 unlink_stmt_vdef (next
);
7354 gsi_remove (&next_si
, true);
7355 release_defs (next
);
7356 free_stmt_vec_info (next
);
7362 /* Function new_stmt_vec_info.
7364 Create and initialize a new stmt_vec_info struct for STMT. */
7367 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7368 bb_vec_info bb_vinfo
)
7371 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7373 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7374 STMT_VINFO_STMT (res
) = stmt
;
7375 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7376 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7377 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7378 STMT_VINFO_LIVE_P (res
) = false;
7379 STMT_VINFO_VECTYPE (res
) = NULL
;
7380 STMT_VINFO_VEC_STMT (res
) = NULL
;
7381 STMT_VINFO_VECTORIZABLE (res
) = true;
7382 STMT_VINFO_IN_PATTERN_P (res
) = false;
7383 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7384 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7385 STMT_VINFO_DATA_REF (res
) = NULL
;
7387 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7388 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7389 STMT_VINFO_DR_INIT (res
) = NULL
;
7390 STMT_VINFO_DR_STEP (res
) = NULL
;
7391 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7393 if (gimple_code (stmt
) == GIMPLE_PHI
7394 && is_loop_header_bb_p (gimple_bb (stmt
)))
7395 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7397 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7399 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7400 STMT_SLP_TYPE (res
) = loop_vect
;
7401 GROUP_FIRST_ELEMENT (res
) = NULL
;
7402 GROUP_NEXT_ELEMENT (res
) = NULL
;
7403 GROUP_SIZE (res
) = 0;
7404 GROUP_STORE_COUNT (res
) = 0;
7405 GROUP_GAP (res
) = 0;
7406 GROUP_SAME_DR_STMT (res
) = NULL
;
7412 /* Create a hash table for stmt_vec_info. */
7415 init_stmt_vec_info_vec (void)
7417 gcc_assert (!stmt_vec_info_vec
.exists ());
7418 stmt_vec_info_vec
.create (50);
7422 /* Free hash table for stmt_vec_info. */
7425 free_stmt_vec_info_vec (void)
7429 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7431 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7432 gcc_assert (stmt_vec_info_vec
.exists ());
7433 stmt_vec_info_vec
.release ();
7437 /* Free stmt vectorization related info. */
7440 free_stmt_vec_info (gimple stmt
)
7442 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7447 /* Check if this statement has a related "pattern stmt"
7448 (introduced by the vectorizer during the pattern recognition
7449 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7451 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7453 stmt_vec_info patt_info
7454 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7457 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7458 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7459 gimple_set_bb (patt_stmt
, NULL
);
7460 tree lhs
= gimple_get_lhs (patt_stmt
);
7461 if (TREE_CODE (lhs
) == SSA_NAME
)
7462 release_ssa_name (lhs
);
7465 gimple_stmt_iterator si
;
7466 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7468 gimple seq_stmt
= gsi_stmt (si
);
7469 gimple_set_bb (seq_stmt
, NULL
);
7470 lhs
= gimple_get_lhs (patt_stmt
);
7471 if (TREE_CODE (lhs
) == SSA_NAME
)
7472 release_ssa_name (lhs
);
7473 free_stmt_vec_info (seq_stmt
);
7476 free_stmt_vec_info (patt_stmt
);
7480 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7481 set_vinfo_for_stmt (stmt
, NULL
);
7486 /* Function get_vectype_for_scalar_type_and_size.
7488 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7492 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7494 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7495 machine_mode simd_mode
;
7496 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7503 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7504 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7507 /* For vector types of elements whose mode precision doesn't
7508 match their types precision we use a element type of mode
7509 precision. The vectorization routines will have to make sure
7510 they support the proper result truncation/extension.
7511 We also make sure to build vector types with INTEGER_TYPE
7512 component type only. */
7513 if (INTEGRAL_TYPE_P (scalar_type
)
7514 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7515 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7516 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7517 TYPE_UNSIGNED (scalar_type
));
7519 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7520 When the component mode passes the above test simply use a type
7521 corresponding to that mode. The theory is that any use that
7522 would cause problems with this will disable vectorization anyway. */
7523 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7524 && !INTEGRAL_TYPE_P (scalar_type
))
7525 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7527 /* We can't build a vector type of elements with alignment bigger than
7529 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7530 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7531 TYPE_UNSIGNED (scalar_type
));
7533 /* If we felt back to using the mode fail if there was
7534 no scalar type for it. */
7535 if (scalar_type
== NULL_TREE
)
7538 /* If no size was supplied use the mode the target prefers. Otherwise
7539 lookup a vector mode of the specified size. */
7541 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7543 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7544 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7548 vectype
= build_vector_type (scalar_type
, nunits
);
7550 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7551 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7557 unsigned int current_vector_size
;
7559 /* Function get_vectype_for_scalar_type.
7561 Returns the vector type corresponding to SCALAR_TYPE as supported
7565 get_vectype_for_scalar_type (tree scalar_type
)
7568 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7569 current_vector_size
);
7571 && current_vector_size
== 0)
7572 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7576 /* Function get_same_sized_vectype
7578 Returns a vector type corresponding to SCALAR_TYPE of size
7579 VECTOR_TYPE if supported by the target. */
7582 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7584 return get_vectype_for_scalar_type_and_size
7585 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7588 /* Function vect_is_simple_use.
7591 LOOP_VINFO - the vect info of the loop that is being vectorized.
7592 BB_VINFO - the vect info of the basic block that is being vectorized.
7593 OPERAND - operand of STMT in the loop or bb.
7594 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7596 Returns whether a stmt with OPERAND can be vectorized.
7597 For loops, supportable operands are constants, loop invariants, and operands
7598 that are defined by the current iteration of the loop. Unsupportable
7599 operands are those that are defined by a previous iteration of the loop (as
7600 is the case in reduction/induction computations).
7601 For basic blocks, supportable operands are constants and bb invariants.
7602 For now, operands defined outside the basic block are not supported. */
7605 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7606 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7607 tree
*def
, enum vect_def_type
*dt
)
7610 stmt_vec_info stmt_vinfo
;
7611 struct loop
*loop
= NULL
;
7614 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7619 if (dump_enabled_p ())
7621 dump_printf_loc (MSG_NOTE
, vect_location
,
7622 "vect_is_simple_use: operand ");
7623 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7624 dump_printf (MSG_NOTE
, "\n");
7627 if (CONSTANT_CLASS_P (operand
))
7629 *dt
= vect_constant_def
;
7633 if (is_gimple_min_invariant (operand
))
7636 *dt
= vect_external_def
;
7640 if (TREE_CODE (operand
) == PAREN_EXPR
)
7642 if (dump_enabled_p ())
7643 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7644 operand
= TREE_OPERAND (operand
, 0);
7647 if (TREE_CODE (operand
) != SSA_NAME
)
7649 if (dump_enabled_p ())
7650 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7655 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7656 if (*def_stmt
== NULL
)
7658 if (dump_enabled_p ())
7659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7664 if (dump_enabled_p ())
7666 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7667 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7668 dump_printf (MSG_NOTE
, "\n");
7671 /* Empty stmt is expected only in case of a function argument.
7672 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7673 if (gimple_nop_p (*def_stmt
))
7676 *dt
= vect_external_def
;
7680 bb
= gimple_bb (*def_stmt
);
7682 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7683 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7684 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7685 *dt
= vect_external_def
;
7688 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7689 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7692 if (*dt
== vect_unknown_def_type
7694 && *dt
== vect_double_reduction_def
7695 && gimple_code (stmt
) != GIMPLE_PHI
))
7697 if (dump_enabled_p ())
7698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7699 "Unsupported pattern.\n");
7703 if (dump_enabled_p ())
7704 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
7706 switch (gimple_code (*def_stmt
))
7709 *def
= gimple_phi_result (*def_stmt
);
7713 *def
= gimple_assign_lhs (*def_stmt
);
7717 *def
= gimple_call_lhs (*def_stmt
);
7722 if (dump_enabled_p ())
7723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7724 "unsupported defining stmt:\n");
7731 /* Function vect_is_simple_use_1.
7733 Same as vect_is_simple_use_1 but also determines the vector operand
7734 type of OPERAND and stores it to *VECTYPE. If the definition of
7735 OPERAND is vect_uninitialized_def, vect_constant_def or
7736 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7737 is responsible to compute the best suited vector type for the
7741 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7742 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7743 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
7745 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
7749 /* Now get a vector type if the def is internal, otherwise supply
7750 NULL_TREE and leave it up to the caller to figure out a proper
7751 type for the use stmt. */
7752 if (*dt
== vect_internal_def
7753 || *dt
== vect_induction_def
7754 || *dt
== vect_reduction_def
7755 || *dt
== vect_double_reduction_def
7756 || *dt
== vect_nested_cycle
)
7758 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
7760 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7761 && !STMT_VINFO_RELEVANT (stmt_info
)
7762 && !STMT_VINFO_LIVE_P (stmt_info
))
7763 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7765 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7766 gcc_assert (*vectype
!= NULL_TREE
);
7768 else if (*dt
== vect_uninitialized_def
7769 || *dt
== vect_constant_def
7770 || *dt
== vect_external_def
)
7771 *vectype
= NULL_TREE
;
7779 /* Function supportable_widening_operation
7781 Check whether an operation represented by the code CODE is a
7782 widening operation that is supported by the target platform in
7783 vector form (i.e., when operating on arguments of type VECTYPE_IN
7784 producing a result of type VECTYPE_OUT).
7786 Widening operations we currently support are NOP (CONVERT), FLOAT
7787 and WIDEN_MULT. This function checks if these operations are supported
7788 by the target platform either directly (via vector tree-codes), or via
7792 - CODE1 and CODE2 are codes of vector operations to be used when
7793 vectorizing the operation, if available.
7794 - MULTI_STEP_CVT determines the number of required intermediate steps in
7795 case of multi-step conversion (like char->short->int - in that case
7796 MULTI_STEP_CVT will be 1).
7797 - INTERM_TYPES contains the intermediate type required to perform the
7798 widening operation (short in the above example). */
7801 supportable_widening_operation (enum tree_code code
, gimple stmt
,
7802 tree vectype_out
, tree vectype_in
,
7803 enum tree_code
*code1
, enum tree_code
*code2
,
7804 int *multi_step_cvt
,
7805 vec
<tree
> *interm_types
)
7807 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7808 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7809 struct loop
*vect_loop
= NULL
;
7810 machine_mode vec_mode
;
7811 enum insn_code icode1
, icode2
;
7812 optab optab1
, optab2
;
7813 tree vectype
= vectype_in
;
7814 tree wide_vectype
= vectype_out
;
7815 enum tree_code c1
, c2
;
7817 tree prev_type
, intermediate_type
;
7818 machine_mode intermediate_mode
, prev_mode
;
7819 optab optab3
, optab4
;
7821 *multi_step_cvt
= 0;
7823 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
7827 case WIDEN_MULT_EXPR
:
7828 /* The result of a vectorized widening operation usually requires
7829 two vectors (because the widened results do not fit into one vector).
7830 The generated vector results would normally be expected to be
7831 generated in the same order as in the original scalar computation,
7832 i.e. if 8 results are generated in each vector iteration, they are
7833 to be organized as follows:
7834 vect1: [res1,res2,res3,res4],
7835 vect2: [res5,res6,res7,res8].
7837 However, in the special case that the result of the widening
7838 operation is used in a reduction computation only, the order doesn't
7839 matter (because when vectorizing a reduction we change the order of
7840 the computation). Some targets can take advantage of this and
7841 generate more efficient code. For example, targets like Altivec,
7842 that support widen_mult using a sequence of {mult_even,mult_odd}
7843 generate the following vectors:
7844 vect1: [res1,res3,res5,res7],
7845 vect2: [res2,res4,res6,res8].
7847 When vectorizing outer-loops, we execute the inner-loop sequentially
7848 (each vectorized inner-loop iteration contributes to VF outer-loop
7849 iterations in parallel). We therefore don't allow to change the
7850 order of the computation in the inner-loop during outer-loop
7852 /* TODO: Another case in which order doesn't *really* matter is when we
7853 widen and then contract again, e.g. (short)((int)x * y >> 8).
7854 Normally, pack_trunc performs an even/odd permute, whereas the
7855 repack from an even/odd expansion would be an interleave, which
7856 would be significantly simpler for e.g. AVX2. */
7857 /* In any case, in order to avoid duplicating the code below, recurse
7858 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7859 are properly set up for the caller. If we fail, we'll continue with
7860 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7862 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
7863 && !nested_in_vect_loop_p (vect_loop
, stmt
)
7864 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
7865 stmt
, vectype_out
, vectype_in
,
7866 code1
, code2
, multi_step_cvt
,
7869 /* Elements in a vector with vect_used_by_reduction property cannot
7870 be reordered if the use chain with this property does not have the
7871 same operation. One such an example is s += a * b, where elements
7872 in a and b cannot be reordered. Here we check if the vector defined
7873 by STMT is only directly used in the reduction statement. */
7874 tree lhs
= gimple_assign_lhs (stmt
);
7875 use_operand_p dummy
;
7877 stmt_vec_info use_stmt_info
= NULL
;
7878 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
7879 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
7880 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
7883 c1
= VEC_WIDEN_MULT_LO_EXPR
;
7884 c2
= VEC_WIDEN_MULT_HI_EXPR
;
7887 case VEC_WIDEN_MULT_EVEN_EXPR
:
7888 /* Support the recursion induced just above. */
7889 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
7890 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
7893 case WIDEN_LSHIFT_EXPR
:
7894 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
7895 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
7899 c1
= VEC_UNPACK_LO_EXPR
;
7900 c2
= VEC_UNPACK_HI_EXPR
;
7904 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
7905 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
7908 case FIX_TRUNC_EXPR
:
7909 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7910 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7911 computing the operation. */
7918 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
7920 enum tree_code ctmp
= c1
;
7925 if (code
== FIX_TRUNC_EXPR
)
7927 /* The signedness is determined from output operand. */
7928 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7929 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
7933 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7934 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
7937 if (!optab1
|| !optab2
)
7940 vec_mode
= TYPE_MODE (vectype
);
7941 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
7942 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
7948 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7949 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7952 /* Check if it's a multi-step conversion that can be done using intermediate
7955 prev_type
= vectype
;
7956 prev_mode
= vec_mode
;
7958 if (!CONVERT_EXPR_CODE_P (code
))
7961 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7962 intermediate steps in promotion sequence. We try
7963 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7965 interm_types
->create (MAX_INTERM_CVT_STEPS
);
7966 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
7968 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
7970 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
7971 TYPE_UNSIGNED (prev_type
));
7972 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
7973 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
7975 if (!optab3
|| !optab4
7976 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
7977 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
7978 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
7979 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
7980 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
7981 == CODE_FOR_nothing
)
7982 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
7983 == CODE_FOR_nothing
))
7986 interm_types
->quick_push (intermediate_type
);
7987 (*multi_step_cvt
)++;
7989 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7990 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7993 prev_type
= intermediate_type
;
7994 prev_mode
= intermediate_mode
;
7997 interm_types
->release ();
8002 /* Function supportable_narrowing_operation
8004 Check whether an operation represented by the code CODE is a
8005 narrowing operation that is supported by the target platform in
8006 vector form (i.e., when operating on arguments of type VECTYPE_IN
8007 and producing a result of type VECTYPE_OUT).
8009 Narrowing operations we currently support are NOP (CONVERT) and
8010 FIX_TRUNC. This function checks if these operations are supported by
8011 the target platform directly via vector tree-codes.
8014 - CODE1 is the code of a vector operation to be used when
8015 vectorizing the operation, if available.
8016 - MULTI_STEP_CVT determines the number of required intermediate steps in
8017 case of multi-step conversion (like int->short->char - in that case
8018 MULTI_STEP_CVT will be 1).
8019 - INTERM_TYPES contains the intermediate type required to perform the
8020 narrowing operation (short in the above example). */
8023 supportable_narrowing_operation (enum tree_code code
,
8024 tree vectype_out
, tree vectype_in
,
8025 enum tree_code
*code1
, int *multi_step_cvt
,
8026 vec
<tree
> *interm_types
)
8028 machine_mode vec_mode
;
8029 enum insn_code icode1
;
8030 optab optab1
, interm_optab
;
8031 tree vectype
= vectype_in
;
8032 tree narrow_vectype
= vectype_out
;
8034 tree intermediate_type
;
8035 machine_mode intermediate_mode
, prev_mode
;
8039 *multi_step_cvt
= 0;
8043 c1
= VEC_PACK_TRUNC_EXPR
;
8046 case FIX_TRUNC_EXPR
:
8047 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8051 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8052 tree code and optabs used for computing the operation. */
8059 if (code
== FIX_TRUNC_EXPR
)
8060 /* The signedness is determined from output operand. */
8061 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8063 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8068 vec_mode
= TYPE_MODE (vectype
);
8069 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8074 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8077 /* Check if it's a multi-step conversion that can be done using intermediate
8079 prev_mode
= vec_mode
;
8080 if (code
== FIX_TRUNC_EXPR
)
8081 uns
= TYPE_UNSIGNED (vectype_out
);
8083 uns
= TYPE_UNSIGNED (vectype
);
8085 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8086 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8087 costly than signed. */
8088 if (code
== FIX_TRUNC_EXPR
&& uns
)
8090 enum insn_code icode2
;
8093 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8095 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8096 if (interm_optab
!= unknown_optab
8097 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8098 && insn_data
[icode1
].operand
[0].mode
8099 == insn_data
[icode2
].operand
[0].mode
)
8102 optab1
= interm_optab
;
8107 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8108 intermediate steps in promotion sequence. We try
8109 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8110 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8111 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8113 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8115 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8117 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8120 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8121 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8122 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8123 == CODE_FOR_nothing
))
8126 interm_types
->quick_push (intermediate_type
);
8127 (*multi_step_cvt
)++;
8129 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8132 prev_mode
= intermediate_mode
;
8133 optab1
= interm_optab
;
8136 interm_types
->release ();