1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "stor-layout.h"
35 #include "hard-reg-set.h"
38 #include "dominance.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
45 #include "gimple-expr.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
62 #include "recog.h" /* FIXME: for insn_data */
64 #include "diagnostic-core.h"
65 #include "tree-vectorizer.h"
68 #include "plugin-api.h"
73 /* For lang_hooks.types.type_for_mode. */
74 #include "langhooks.h"
76 /* Return the vectorized type for the given statement. */
79 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
81 return STMT_VINFO_VECTYPE (stmt_info
);
84 /* Return TRUE iff the given statement is in an inner loop relative to
85 the loop being vectorized. */
87 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
89 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
90 basic_block bb
= gimple_bb (stmt
);
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
97 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
99 return (bb
->loop_father
== loop
->inner
);
102 /* Record the cost of a statement, either by directly informing the
103 target model or by saving it in a vector for later processing.
104 Return a preliminary estimate of the statement's cost. */
107 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
108 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
109 int misalign
, enum vect_cost_model_location where
)
113 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
114 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
115 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
118 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
123 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
124 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
125 void *target_cost_data
;
128 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
130 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
132 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
137 /* Return a variable of type ELEM_TYPE[NELEMS]. */
140 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
142 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
146 /* ARRAY is an array of vectors created by create_vector_array.
147 Return an SSA_NAME for the vector in index N. The reference
148 is part of the vectorization of STMT and the vector is associated
149 with scalar destination SCALAR_DEST. */
152 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
153 tree array
, unsigned HOST_WIDE_INT n
)
155 tree vect_type
, vect
, vect_name
, array_ref
;
158 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
159 vect_type
= TREE_TYPE (TREE_TYPE (array
));
160 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
161 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (vect
, array_ref
);
166 vect_name
= make_ssa_name (vect
, new_stmt
);
167 gimple_assign_set_lhs (new_stmt
, vect_name
);
168 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
173 /* ARRAY is an array of vectors created by create_vector_array.
174 Emit code to store SSA_NAME VECT in index N of the array.
175 The store is part of the vectorization of STMT. */
178 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
179 tree array
, unsigned HOST_WIDE_INT n
)
184 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
185 build_int_cst (size_type_node
, n
),
186 NULL_TREE
, NULL_TREE
);
188 new_stmt
= gimple_build_assign (array_ref
, vect
);
189 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
192 /* PTR is a pointer to an array of type TYPE. Return a representation
193 of *PTR. The memory reference replaces those in FIRST_DR
197 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
199 tree mem_ref
, alias_ptr_type
;
201 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
202 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
203 /* Arrays have the same alignment as their type. */
204 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
208 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
210 /* Function vect_mark_relevant.
212 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
215 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
216 enum vect_relevant relevant
, bool live_p
,
217 bool used_in_pattern
)
219 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
220 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
221 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_NOTE
, vect_location
,
226 "mark relevant %d, live %d.\n", relevant
, live_p
);
228 /* If this stmt is an original stmt in a pattern, we might need to mark its
229 related pattern stmt instead of the original stmt. However, such stmts
230 may have their own uses that are not in any pattern, in such cases the
231 stmt itself should be marked. */
232 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
235 if (!used_in_pattern
)
237 imm_use_iterator imm_iter
;
241 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
242 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
244 if (is_gimple_assign (stmt
))
245 lhs
= gimple_assign_lhs (stmt
);
247 lhs
= gimple_call_lhs (stmt
);
249 /* This use is out of pattern use, if LHS has other uses that are
250 pattern uses, we should mark the stmt itself, and not the pattern
252 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
253 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
255 if (is_gimple_debug (USE_STMT (use_p
)))
257 use_stmt
= USE_STMT (use_p
);
259 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
262 if (vinfo_for_stmt (use_stmt
)
263 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
273 /* This is the last stmt in a sequence that was detected as a
274 pattern that can potentially be vectorized. Don't mark the stmt
275 as relevant/live because it's not going to be vectorized.
276 Instead mark the pattern-stmt that replaces it. */
278 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE
, vect_location
,
282 "last stmt in pattern. don't mark"
283 " relevant/live.\n");
284 stmt_info
= vinfo_for_stmt (pattern_stmt
);
285 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
286 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
287 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
292 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
293 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
294 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
296 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
297 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE
, vect_location
,
301 "already marked relevant/live.\n");
305 worklist
->safe_push (stmt
);
309 /* Function vect_stmt_relevant_p.
311 Return true if STMT in loop that is represented by LOOP_VINFO is
312 "relevant for vectorization".
314 A stmt is considered "relevant for vectorization" if:
315 - it has uses outside the loop.
316 - it has vdefs (it alters memory).
317 - control stmts in the loop (except for the exit condition).
319 CHECKME: what other side effects would the vectorizer allow? */
322 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
323 enum vect_relevant
*relevant
, bool *live_p
)
325 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
327 imm_use_iterator imm_iter
;
331 *relevant
= vect_unused_in_scope
;
334 /* cond stmt other than loop exit cond. */
335 if (is_ctrl_stmt (stmt
)
336 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
337 != loop_exit_ctrl_vec_info_type
)
338 *relevant
= vect_used_in_scope
;
340 /* changing memory. */
341 if (gimple_code (stmt
) != GIMPLE_PHI
)
342 if (gimple_vdef (stmt
))
344 if (dump_enabled_p ())
345 dump_printf_loc (MSG_NOTE
, vect_location
,
346 "vec_stmt_relevant_p: stmt has vdefs.\n");
347 *relevant
= vect_used_in_scope
;
350 /* uses outside the loop. */
351 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
353 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
355 basic_block bb
= gimple_bb (USE_STMT (use_p
));
356 if (!flow_bb_inside_loop_p (loop
, bb
))
358 if (dump_enabled_p ())
359 dump_printf_loc (MSG_NOTE
, vect_location
,
360 "vec_stmt_relevant_p: used out of loop.\n");
362 if (is_gimple_debug (USE_STMT (use_p
)))
365 /* We expect all such uses to be in the loop exit phis
366 (because of loop closed form) */
367 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
368 gcc_assert (bb
== single_exit (loop
)->dest
);
375 return (*live_p
|| *relevant
);
379 /* Function exist_non_indexing_operands_for_use_p
381 USE is one of the uses attached to STMT. Check if USE is
382 used in STMT for anything other than indexing an array. */
385 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
388 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
390 /* USE corresponds to some operand in STMT. If there is no data
391 reference in STMT, then any operand that corresponds to USE
392 is not indexing an array. */
393 if (!STMT_VINFO_DATA_REF (stmt_info
))
396 /* STMT has a data_ref. FORNOW this means that its of one of
400 (This should have been verified in analyze_data_refs).
402 'var' in the second case corresponds to a def, not a use,
403 so USE cannot correspond to any operands that are not used
406 Therefore, all we need to check is if STMT falls into the
407 first case, and whether var corresponds to USE. */
409 if (!gimple_assign_copy_p (stmt
))
411 if (is_gimple_call (stmt
)
412 && gimple_call_internal_p (stmt
))
413 switch (gimple_call_internal_fn (stmt
))
416 operand
= gimple_call_arg (stmt
, 3);
421 operand
= gimple_call_arg (stmt
, 2);
431 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
433 operand
= gimple_assign_rhs1 (stmt
);
434 if (TREE_CODE (operand
) != SSA_NAME
)
445 Function process_use.
448 - a USE in STMT in a loop represented by LOOP_VINFO
449 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
450 that defined USE. This is done by calling mark_relevant and passing it
451 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
452 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
456 Generally, LIVE_P and RELEVANT are used to define the liveness and
457 relevance info of the DEF_STMT of this USE:
458 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
459 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
461 - case 1: If USE is used only for address computations (e.g. array indexing),
462 which does not need to be directly vectorized, then the liveness/relevance
463 of the respective DEF_STMT is left unchanged.
464 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
465 skip DEF_STMT cause it had already been processed.
466 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
467 be modified accordingly.
469 Return true if everything is as expected. Return false otherwise. */
472 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
473 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
476 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
477 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
478 stmt_vec_info dstmt_vinfo
;
479 basic_block bb
, def_bb
;
482 enum vect_def_type dt
;
484 /* case 1: we are only interested in uses that need to be vectorized. Uses
485 that are used for address computation are not considered relevant. */
486 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
489 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
491 if (dump_enabled_p ())
492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
493 "not vectorized: unsupported use in stmt.\n");
497 if (!def_stmt
|| gimple_nop_p (def_stmt
))
500 def_bb
= gimple_bb (def_stmt
);
501 if (!flow_bb_inside_loop_p (loop
, def_bb
))
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
508 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
509 DEF_STMT must have already been processed, because this should be the
510 only way that STMT, which is a reduction-phi, was put in the worklist,
511 as there should be no other uses for DEF_STMT in the loop. So we just
512 check that everything is as expected, and we are done. */
513 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
514 bb
= gimple_bb (stmt
);
515 if (gimple_code (stmt
) == GIMPLE_PHI
516 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
517 && gimple_code (def_stmt
) != GIMPLE_PHI
518 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
519 && bb
->loop_father
== def_bb
->loop_father
)
521 if (dump_enabled_p ())
522 dump_printf_loc (MSG_NOTE
, vect_location
,
523 "reduc-stmt defining reduc-phi in the same nest.\n");
524 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
525 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
526 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
527 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
528 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
532 /* case 3a: outer-loop stmt defining an inner-loop stmt:
533 outer-loop-header-bb:
539 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE
, vect_location
,
543 "outer-loop def-stmt defining inner-loop stmt.\n");
547 case vect_unused_in_scope
:
548 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
549 vect_used_in_scope
: vect_unused_in_scope
;
552 case vect_used_in_outer_by_reduction
:
553 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
554 relevant
= vect_used_by_reduction
;
557 case vect_used_in_outer
:
558 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
559 relevant
= vect_used_in_scope
;
562 case vect_used_in_scope
:
570 /* case 3b: inner-loop stmt defining an outer-loop stmt:
571 outer-loop-header-bb:
575 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
577 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "inner-loop def-stmt defining outer-loop stmt.\n");
585 case vect_unused_in_scope
:
586 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
587 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
588 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
591 case vect_used_by_reduction
:
592 relevant
= vect_used_in_outer_by_reduction
;
595 case vect_used_in_scope
:
596 relevant
= vect_used_in_outer
;
604 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
605 is_pattern_stmt_p (stmt_vinfo
));
610 /* Function vect_mark_stmts_to_be_vectorized.
612 Not all stmts in the loop need to be vectorized. For example:
621 Stmt 1 and 3 do not need to be vectorized, because loop control and
622 addressing of vectorized data-refs are handled differently.
624 This pass detects such stmts. */
627 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
629 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
630 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
631 unsigned int nbbs
= loop
->num_nodes
;
632 gimple_stmt_iterator si
;
635 stmt_vec_info stmt_vinfo
;
639 enum vect_relevant relevant
, tmp_relevant
;
640 enum vect_def_type def_type
;
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "=== vect_mark_stmts_to_be_vectorized ===\n");
646 auto_vec
<gimple
, 64> worklist
;
648 /* 1. Init worklist. */
649 for (i
= 0; i
< nbbs
; i
++)
652 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
658 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
659 dump_printf (MSG_NOTE
, "\n");
662 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
663 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
665 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
667 stmt
= gsi_stmt (si
);
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
672 dump_printf (MSG_NOTE
, "\n");
675 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
676 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
680 /* 2. Process_worklist */
681 while (worklist
.length () > 0)
686 stmt
= worklist
.pop ();
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
690 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
691 dump_printf (MSG_NOTE
, "\n");
694 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
695 (DEF_STMT) as relevant/irrelevant and live/dead according to the
696 liveness and relevance properties of STMT. */
697 stmt_vinfo
= vinfo_for_stmt (stmt
);
698 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
699 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
701 /* Generally, the liveness and relevance properties of STMT are
702 propagated as is to the DEF_STMTs of its USEs:
703 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
704 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
706 One exception is when STMT has been identified as defining a reduction
707 variable; in this case we set the liveness/relevance as follows:
709 relevant = vect_used_by_reduction
710 This is because we distinguish between two kinds of relevant stmts -
711 those that are used by a reduction computation, and those that are
712 (also) used by a regular computation. This allows us later on to
713 identify stmts that are used solely by a reduction, and therefore the
714 order of the results that they produce does not have to be kept. */
716 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
717 tmp_relevant
= relevant
;
720 case vect_reduction_def
:
721 switch (tmp_relevant
)
723 case vect_unused_in_scope
:
724 relevant
= vect_used_by_reduction
;
727 case vect_used_by_reduction
:
728 if (gimple_code (stmt
) == GIMPLE_PHI
)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
735 "unsupported use of reduction.\n");
742 case vect_nested_cycle
:
743 if (tmp_relevant
!= vect_unused_in_scope
744 && tmp_relevant
!= vect_used_in_outer_by_reduction
745 && tmp_relevant
!= vect_used_in_outer
)
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
749 "unsupported use of nested cycle.\n");
757 case vect_double_reduction_def
:
758 if (tmp_relevant
!= vect_unused_in_scope
759 && tmp_relevant
!= vect_used_by_reduction
)
761 if (dump_enabled_p ())
762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
763 "unsupported use of double reduction.\n");
775 if (is_pattern_stmt_p (stmt_vinfo
))
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (is_gimple_assign (stmt
))
782 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
783 tree op
= gimple_assign_rhs1 (stmt
);
786 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
788 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
789 live_p
, relevant
, &worklist
, false)
790 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
791 live_p
, relevant
, &worklist
, false))
795 for (; i
< gimple_num_ops (stmt
); i
++)
797 op
= gimple_op (stmt
, i
);
798 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
803 else if (is_gimple_call (stmt
))
805 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
807 tree arg
= gimple_call_arg (stmt
, i
);
808 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
815 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
817 tree op
= USE_FROM_PTR (use_p
);
818 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
823 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
826 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
828 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
832 } /* while worklist */
838 /* Function vect_model_simple_cost.
840 Models cost for simple operations, i.e. those that only emit ncopies of a
841 single op. Right now, this does not account for multiple insns that could
842 be generated for the single vector op. We will handle that shortly. */
845 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
846 enum vect_def_type
*dt
,
847 stmt_vector_for_cost
*prologue_cost_vec
,
848 stmt_vector_for_cost
*body_cost_vec
)
851 int inside_cost
= 0, prologue_cost
= 0;
853 /* The SLP costs were already calculated during SLP tree build. */
854 if (PURE_SLP_STMT (stmt_info
))
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i
= 0; i
< 2; i
++)
859 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
860 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
861 stmt_info
, 0, vect_prologue
);
863 /* Pass the inside-of-loop statements to the target-specific cost model. */
864 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
865 stmt_info
, 0, vect_body
);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE
, vect_location
,
869 "vect_model_simple_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
874 /* Model cost for type demotion and promotion operations. PWR is normally
875 zero for single-step promotions and demotions. It will be one if
876 two-step promotion/demotion is required, and so on. Each additional
877 step doubles the number of instructions required. */
880 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
881 enum vect_def_type
*dt
, int pwr
)
884 int inside_cost
= 0, prologue_cost
= 0;
885 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
886 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
887 void *target_cost_data
;
889 /* The SLP costs were already calculated during SLP tree build. */
890 if (PURE_SLP_STMT (stmt_info
))
894 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
896 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
898 for (i
= 0; i
< pwr
+ 1; i
++)
900 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
902 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
903 vec_promote_demote
, stmt_info
, 0,
907 /* FORNOW: Assuming maximum 2 args per stmts. */
908 for (i
= 0; i
< 2; i
++)
909 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
910 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
911 stmt_info
, 0, vect_prologue
);
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_NOTE
, vect_location
,
915 "vect_model_promotion_demotion_cost: inside_cost = %d, "
916 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
919 /* Function vect_cost_group_size
921 For grouped load or store, return the group_size only if it is the first
922 load or store of a group, else return 1. This ensures that group size is
923 only returned once per group. */
926 vect_cost_group_size (stmt_vec_info stmt_info
)
928 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
930 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
931 return GROUP_SIZE (stmt_info
);
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
943 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
944 bool store_lanes_p
, enum vect_def_type dt
,
946 stmt_vector_for_cost
*prologue_cost_vec
,
947 stmt_vector_for_cost
*body_cost_vec
)
950 unsigned int inside_cost
= 0, prologue_cost
= 0;
951 struct data_reference
*first_dr
;
954 /* The SLP costs were already calculated during SLP tree build. */
955 if (PURE_SLP_STMT (stmt_info
))
958 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
959 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
960 stmt_info
, 0, vect_prologue
);
962 /* Grouped access? */
963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
967 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
972 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
973 group_size
= vect_cost_group_size (stmt_info
);
976 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
978 /* Not a grouped access. */
982 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
985 /* We assume that the cost of a single store-lanes instruction is
986 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
987 access is instead being provided by a permute-and-store operation,
988 include the cost of the permutes. */
989 if (!store_lanes_p
&& group_size
> 1)
991 /* Uses a high and low interleave or shuffle operations for each
993 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
994 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
995 stmt_info
, 0, vect_body
);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE
, vect_location
,
999 "vect_model_store_cost: strided group_size = %d .\n",
1003 /* Costs of the stores. */
1004 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1006 if (dump_enabled_p ())
1007 dump_printf_loc (MSG_NOTE
, vect_location
,
1008 "vect_model_store_cost: inside_cost = %d, "
1009 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1013 /* Calculate cost of DR's memory access. */
1015 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1016 unsigned int *inside_cost
,
1017 stmt_vector_for_cost
*body_cost_vec
)
1019 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1020 gimple stmt
= DR_STMT (dr
);
1021 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1023 switch (alignment_support_scheme
)
1027 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1028 vector_store
, stmt_info
, 0,
1031 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_NOTE
, vect_location
,
1033 "vect_model_store_cost: aligned.\n");
1037 case dr_unaligned_supported
:
1039 /* Here, we assign an additional cost for the unaligned store. */
1040 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1041 unaligned_store
, stmt_info
,
1042 DR_MISALIGNMENT (dr
), vect_body
);
1043 if (dump_enabled_p ())
1044 dump_printf_loc (MSG_NOTE
, vect_location
,
1045 "vect_model_store_cost: unaligned supported by "
1050 case dr_unaligned_unsupported
:
1052 *inside_cost
= VECT_MAX_COST
;
1054 if (dump_enabled_p ())
1055 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1056 "vect_model_store_cost: unsupported access.\n");
1066 /* Function vect_model_load_cost
1068 Models cost for loads. In the case of grouped accesses, the last access
1069 has the overhead of the grouped access attributed to it. Since unaligned
1070 accesses are supported for loads, we also account for the costs of the
1071 access scheme chosen. */
1074 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1075 bool load_lanes_p
, slp_tree slp_node
,
1076 stmt_vector_for_cost
*prologue_cost_vec
,
1077 stmt_vector_for_cost
*body_cost_vec
)
1081 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1082 unsigned int inside_cost
= 0, prologue_cost
= 0;
1084 /* The SLP costs were already calculated during SLP tree build. */
1085 if (PURE_SLP_STMT (stmt_info
))
1088 /* Grouped accesses? */
1089 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1090 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1092 group_size
= vect_cost_group_size (stmt_info
);
1093 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1095 /* Not a grouped access. */
1102 /* We assume that the cost of a single load-lanes instruction is
1103 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1104 access is instead being provided by a load-and-permute operation,
1105 include the cost of the permutes. */
1106 if (!load_lanes_p
&& group_size
> 1)
1108 /* Uses an even and odd extract operations or shuffle operations
1109 for each needed permute. */
1110 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1111 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1112 stmt_info
, 0, vect_body
);
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_NOTE
, vect_location
,
1116 "vect_model_load_cost: strided group_size = %d .\n",
1120 /* The loads themselves. */
1121 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1123 /* N scalar loads plus gathering them into a vector. */
1124 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1125 inside_cost
+= record_stmt_cost (body_cost_vec
,
1126 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1127 scalar_load
, stmt_info
, 0, vect_body
);
1128 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1129 stmt_info
, 0, vect_body
);
1132 vect_get_load_cost (first_dr
, ncopies
,
1133 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1134 || group_size
> 1 || slp_node
),
1135 &inside_cost
, &prologue_cost
,
1136 prologue_cost_vec
, body_cost_vec
, true);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE
, vect_location
,
1140 "vect_model_load_cost: inside_cost = %d, "
1141 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1145 /* Calculate cost of DR's memory access. */
1147 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1148 bool add_realign_cost
, unsigned int *inside_cost
,
1149 unsigned int *prologue_cost
,
1150 stmt_vector_for_cost
*prologue_cost_vec
,
1151 stmt_vector_for_cost
*body_cost_vec
,
1152 bool record_prologue_costs
)
1154 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1155 gimple stmt
= DR_STMT (dr
);
1156 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1158 switch (alignment_support_scheme
)
1162 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1163 stmt_info
, 0, vect_body
);
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE
, vect_location
,
1167 "vect_model_load_cost: aligned.\n");
1171 case dr_unaligned_supported
:
1173 /* Here, we assign an additional cost for the unaligned load. */
1174 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1175 unaligned_load
, stmt_info
,
1176 DR_MISALIGNMENT (dr
), vect_body
);
1178 if (dump_enabled_p ())
1179 dump_printf_loc (MSG_NOTE
, vect_location
,
1180 "vect_model_load_cost: unaligned supported by "
1185 case dr_explicit_realign
:
1187 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1188 vector_load
, stmt_info
, 0, vect_body
);
1189 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1190 vec_perm
, stmt_info
, 0, vect_body
);
1192 /* FIXME: If the misalignment remains fixed across the iterations of
1193 the containing loop, the following cost should be added to the
1195 if (targetm
.vectorize
.builtin_mask_for_load
)
1196 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1197 stmt_info
, 0, vect_body
);
1199 if (dump_enabled_p ())
1200 dump_printf_loc (MSG_NOTE
, vect_location
,
1201 "vect_model_load_cost: explicit realign\n");
1205 case dr_explicit_realign_optimized
:
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE
, vect_location
,
1209 "vect_model_load_cost: unaligned software "
1212 /* Unaligned software pipeline has a load of an address, an initial
1213 load, and possibly a mask operation to "prime" the loop. However,
1214 if this is an access in a group of loads, which provide grouped
1215 access, then the above cost should only be considered for one
1216 access in the group. Inside the loop, there is a load op
1217 and a realignment op. */
1219 if (add_realign_cost
&& record_prologue_costs
)
1221 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1222 vector_stmt
, stmt_info
,
1224 if (targetm
.vectorize
.builtin_mask_for_load
)
1225 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1226 vector_stmt
, stmt_info
,
1230 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1231 stmt_info
, 0, vect_body
);
1232 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1233 stmt_info
, 0, vect_body
);
1235 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_NOTE
, vect_location
,
1237 "vect_model_load_cost: explicit realign optimized"
1243 case dr_unaligned_unsupported
:
1245 *inside_cost
= VECT_MAX_COST
;
1247 if (dump_enabled_p ())
1248 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1249 "vect_model_load_cost: unsupported access.\n");
1258 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1259 the loop preheader for the vectorized stmt STMT. */
1262 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1265 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1268 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1269 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1273 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1277 if (nested_in_vect_loop_p (loop
, stmt
))
1280 pe
= loop_preheader_edge (loop
);
1281 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1282 gcc_assert (!new_bb
);
1286 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1288 gimple_stmt_iterator gsi_bb_start
;
1290 gcc_assert (bb_vinfo
);
1291 bb
= BB_VINFO_BB (bb_vinfo
);
1292 gsi_bb_start
= gsi_after_labels (bb
);
1293 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1297 if (dump_enabled_p ())
1299 dump_printf_loc (MSG_NOTE
, vect_location
,
1300 "created new init_stmt: ");
1301 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1302 dump_printf (MSG_NOTE
, "\n");
1306 /* Function vect_init_vector.
1308 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1309 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1310 vector type a vector with all elements equal to VAL is created first.
1311 Place the initialization at BSI if it is not NULL. Otherwise, place the
1312 initialization at the loop preheader.
1313 Return the DEF of INIT_STMT.
1314 It will be used in the vectorization of STMT. */
1317 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1324 if (TREE_CODE (type
) == VECTOR_TYPE
1325 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1327 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1329 if (CONSTANT_CLASS_P (val
))
1330 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1333 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1334 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1337 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1341 val
= build_vector_from_val (type
, val
);
1344 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1345 init_stmt
= gimple_build_assign (new_var
, val
);
1346 new_temp
= make_ssa_name (new_var
, init_stmt
);
1347 gimple_assign_set_lhs (init_stmt
, new_temp
);
1348 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1349 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1354 /* Function vect_get_vec_def_for_operand.
1356 OP is an operand in STMT. This function returns a (vector) def that will be
1357 used in the vectorized stmt for STMT.
1359 In the case that OP is an SSA_NAME which is defined in the loop, then
1360 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1362 In case OP is an invariant or constant, a new stmt that creates a vector def
1363 needs to be introduced. */
1366 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1371 stmt_vec_info def_stmt_info
= NULL
;
1372 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1373 unsigned int nunits
;
1374 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1376 enum vect_def_type dt
;
1380 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE
, vect_location
,
1383 "vect_get_vec_def_for_operand: ");
1384 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1385 dump_printf (MSG_NOTE
, "\n");
1388 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1389 &def_stmt
, &def
, &dt
);
1390 gcc_assert (is_simple_use
);
1391 if (dump_enabled_p ())
1393 int loc_printed
= 0;
1396 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1398 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1399 dump_printf (MSG_NOTE
, "\n");
1404 dump_printf (MSG_NOTE
, " def_stmt = ");
1406 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1407 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1408 dump_printf (MSG_NOTE
, "\n");
1414 /* Case 1: operand is a constant. */
1415 case vect_constant_def
:
1417 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1418 gcc_assert (vector_type
);
1419 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1424 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1425 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE
, vect_location
,
1427 "Create vector_cst. nunits = %d\n", nunits
);
1429 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1432 /* Case 2: operand is defined outside the loop - loop invariant. */
1433 case vect_external_def
:
1435 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1436 gcc_assert (vector_type
);
1441 /* Create 'vec_inv = {inv,inv,..,inv}' */
1442 if (dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1445 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1448 /* Case 3: operand is defined inside the loop. */
1449 case vect_internal_def
:
1452 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1457 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1458 /* Get vectorized pattern statement. */
1460 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1461 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1462 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1463 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1464 gcc_assert (vec_stmt
);
1465 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1466 vec_oprnd
= PHI_RESULT (vec_stmt
);
1467 else if (is_gimple_call (vec_stmt
))
1468 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1470 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1474 /* Case 4: operand is defined by a loop header phi - reduction */
1475 case vect_reduction_def
:
1476 case vect_double_reduction_def
:
1477 case vect_nested_cycle
:
1481 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1482 loop
= (gimple_bb (def_stmt
))->loop_father
;
1484 /* Get the def before the loop */
1485 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1486 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1489 /* Case 5: operand is defined by loop-header phi - induction. */
1490 case vect_induction_def
:
1492 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1494 /* Get the def from the vectorized stmt. */
1495 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1496 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1497 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1498 vec_oprnd
= PHI_RESULT (vec_stmt
);
1500 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1510 /* Function vect_get_vec_def_for_stmt_copy
1512 Return a vector-def for an operand. This function is used when the
1513 vectorized stmt to be created (by the caller to this function) is a "copy"
1514 created in case the vectorized result cannot fit in one vector, and several
1515 copies of the vector-stmt are required. In this case the vector-def is
1516 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1517 of the stmt that defines VEC_OPRND.
1518 DT is the type of the vector def VEC_OPRND.
1521 In case the vectorization factor (VF) is bigger than the number
1522 of elements that can fit in a vectype (nunits), we have to generate
1523 more than one vector stmt to vectorize the scalar stmt. This situation
1524 arises when there are multiple data-types operated upon in the loop; the
1525 smallest data-type determines the VF, and as a result, when vectorizing
1526 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1527 vector stmt (each computing a vector of 'nunits' results, and together
1528 computing 'VF' results in each iteration). This function is called when
1529 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1530 which VF=16 and nunits=4, so the number of copies required is 4):
1532 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1534 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1535 VS1.1: vx.1 = memref1 VS1.2
1536 VS1.2: vx.2 = memref2 VS1.3
1537 VS1.3: vx.3 = memref3
1539 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1540 VSnew.1: vz1 = vx.1 + ... VSnew.2
1541 VSnew.2: vz2 = vx.2 + ... VSnew.3
1542 VSnew.3: vz3 = vx.3 + ...
1544 The vectorization of S1 is explained in vectorizable_load.
1545 The vectorization of S2:
1546 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1547 the function 'vect_get_vec_def_for_operand' is called to
1548 get the relevant vector-def for each operand of S2. For operand x it
1549 returns the vector-def 'vx.0'.
1551 To create the remaining copies of the vector-stmt (VSnew.j), this
1552 function is called to get the relevant vector-def for each operand. It is
1553 obtained from the respective VS1.j stmt, which is recorded in the
1554 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1556 For example, to obtain the vector-def 'vx.1' in order to create the
1557 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1558 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1559 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1560 and return its def ('vx.1').
1561 Overall, to create the above sequence this function will be called 3 times:
1562 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1563 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1564 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1567 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1569 gimple vec_stmt_for_operand
;
1570 stmt_vec_info def_stmt_info
;
1572 /* Do nothing; can reuse same def. */
1573 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1576 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1577 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1578 gcc_assert (def_stmt_info
);
1579 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1580 gcc_assert (vec_stmt_for_operand
);
1581 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1582 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1583 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1585 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1590 /* Get vectorized definitions for the operands to create a copy of an original
1591 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1594 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1595 vec
<tree
> *vec_oprnds0
,
1596 vec
<tree
> *vec_oprnds1
)
1598 tree vec_oprnd
= vec_oprnds0
->pop ();
1600 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1601 vec_oprnds0
->quick_push (vec_oprnd
);
1603 if (vec_oprnds1
&& vec_oprnds1
->length ())
1605 vec_oprnd
= vec_oprnds1
->pop ();
1606 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1607 vec_oprnds1
->quick_push (vec_oprnd
);
1612 /* Get vectorized definitions for OP0 and OP1.
1613 REDUC_INDEX is the index of reduction operand in case of reduction,
1614 and -1 otherwise. */
1617 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1618 vec
<tree
> *vec_oprnds0
,
1619 vec
<tree
> *vec_oprnds1
,
1620 slp_tree slp_node
, int reduc_index
)
1624 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1625 auto_vec
<tree
> ops (nops
);
1626 auto_vec
<vec
<tree
> > vec_defs (nops
);
1628 ops
.quick_push (op0
);
1630 ops
.quick_push (op1
);
1632 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1634 *vec_oprnds0
= vec_defs
[0];
1636 *vec_oprnds1
= vec_defs
[1];
1642 vec_oprnds0
->create (1);
1643 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1644 vec_oprnds0
->quick_push (vec_oprnd
);
1648 vec_oprnds1
->create (1);
1649 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1650 vec_oprnds1
->quick_push (vec_oprnd
);
1656 /* Function vect_finish_stmt_generation.
1658 Insert a new stmt. */
1661 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1662 gimple_stmt_iterator
*gsi
)
1664 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1665 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1666 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1668 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1670 if (!gsi_end_p (*gsi
)
1671 && gimple_has_mem_ops (vec_stmt
))
1673 gimple at_stmt
= gsi_stmt (*gsi
);
1674 tree vuse
= gimple_vuse (at_stmt
);
1675 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1677 tree vdef
= gimple_vdef (at_stmt
);
1678 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1679 /* If we have an SSA vuse and insert a store, update virtual
1680 SSA form to avoid triggering the renamer. Do so only
1681 if we can easily see all uses - which is what almost always
1682 happens with the way vectorized stmts are inserted. */
1683 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1684 && ((is_gimple_assign (vec_stmt
)
1685 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1686 || (is_gimple_call (vec_stmt
)
1687 && !(gimple_call_flags (vec_stmt
)
1688 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1690 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1691 gimple_set_vdef (vec_stmt
, new_vdef
);
1692 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1696 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1698 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1701 if (dump_enabled_p ())
1703 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1704 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1705 dump_printf (MSG_NOTE
, "\n");
1708 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1710 /* While EH edges will generally prevent vectorization, stmt might
1711 e.g. be in a must-not-throw region. Ensure newly created stmts
1712 that could throw are part of the same region. */
1713 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1714 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1715 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1718 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1719 a function declaration if the target has a vectorized version
1720 of the function, or NULL_TREE if the function cannot be vectorized. */
1723 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1725 tree fndecl
= gimple_call_fndecl (call
);
1727 /* We only handle functions that do not read or clobber memory -- i.e.
1728 const or novops ones. */
1729 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1733 || TREE_CODE (fndecl
) != FUNCTION_DECL
1734 || !DECL_BUILT_IN (fndecl
))
1737 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1742 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1743 gimple_stmt_iterator
*);
1746 /* Function vectorizable_mask_load_store.
1748 Check if STMT performs a conditional load or store that can be vectorized.
1749 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1750 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1751 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1754 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1755 gimple
*vec_stmt
, slp_tree slp_node
)
1757 tree vec_dest
= NULL
;
1758 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1759 stmt_vec_info prev_stmt_info
;
1760 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1761 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1762 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1763 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1764 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1768 tree dataref_ptr
= NULL_TREE
;
1770 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1774 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1775 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1776 int gather_scale
= 1;
1777 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1782 enum vect_def_type dt
;
1784 if (slp_node
!= NULL
)
1787 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1788 gcc_assert (ncopies
>= 1);
1790 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1791 mask
= gimple_call_arg (stmt
, 2);
1792 if (TYPE_PRECISION (TREE_TYPE (mask
))
1793 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1796 /* FORNOW. This restriction should be relaxed. */
1797 if (nested_in_vect_loop
&& ncopies
> 1)
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1801 "multiple types in nested loop.");
1805 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1808 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1811 if (!STMT_VINFO_DATA_REF (stmt_info
))
1814 elem_type
= TREE_TYPE (vectype
);
1816 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1819 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1822 if (STMT_VINFO_GATHER_P (stmt_info
))
1826 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1827 &gather_off
, &gather_scale
);
1828 gcc_assert (gather_decl
);
1829 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1830 &def_stmt
, &def
, &gather_dt
,
1831 &gather_off_vectype
))
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1835 "gather index use not simple.");
1839 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1841 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1842 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1846 "masked gather with integer mask not supported.");
1850 else if (tree_int_cst_compare (nested_in_vect_loop
1851 ? STMT_VINFO_DR_STEP (stmt_info
)
1852 : DR_STEP (dr
), size_zero_node
) <= 0)
1854 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1855 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1858 if (TREE_CODE (mask
) != SSA_NAME
)
1861 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1862 &def_stmt
, &def
, &dt
))
1867 tree rhs
= gimple_call_arg (stmt
, 3);
1868 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1869 &def_stmt
, &def
, &dt
))
1873 if (!vec_stmt
) /* transformation not required. */
1875 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1877 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1880 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1886 if (STMT_VINFO_GATHER_P (stmt_info
))
1888 tree vec_oprnd0
= NULL_TREE
, op
;
1889 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1890 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1891 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1892 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1893 tree mask_perm_mask
= NULL_TREE
;
1894 edge pe
= loop_preheader_edge (loop
);
1897 enum { NARROW
, NONE
, WIDEN
} modifier
;
1898 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1900 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1901 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1902 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1903 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1904 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1905 scaletype
= TREE_VALUE (arglist
);
1906 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1907 && types_compatible_p (srctype
, masktype
));
1909 if (nunits
== gather_off_nunits
)
1911 else if (nunits
== gather_off_nunits
/ 2)
1913 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1916 for (i
= 0; i
< gather_off_nunits
; ++i
)
1917 sel
[i
] = i
| nunits
;
1919 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
1920 gcc_assert (perm_mask
!= NULL_TREE
);
1922 else if (nunits
== gather_off_nunits
* 2)
1924 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1927 for (i
= 0; i
< nunits
; ++i
)
1928 sel
[i
] = i
< gather_off_nunits
1929 ? i
: i
+ nunits
- gather_off_nunits
;
1931 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
1932 gcc_assert (perm_mask
!= NULL_TREE
);
1934 for (i
= 0; i
< nunits
; ++i
)
1935 sel
[i
] = i
| gather_off_nunits
;
1936 mask_perm_mask
= vect_gen_perm_mask (masktype
, sel
);
1937 gcc_assert (mask_perm_mask
!= NULL_TREE
);
1942 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1944 ptr
= fold_convert (ptrtype
, gather_base
);
1945 if (!is_gimple_min_invariant (ptr
))
1947 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1948 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1949 gcc_assert (!new_bb
);
1952 scale
= build_int_cst (scaletype
, gather_scale
);
1954 prev_stmt_info
= NULL
;
1955 for (j
= 0; j
< ncopies
; ++j
)
1957 if (modifier
== WIDEN
&& (j
& 1))
1958 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1959 perm_mask
, stmt
, gsi
);
1962 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1965 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1967 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1970 == TYPE_VECTOR_SUBPARTS (idxtype
));
1971 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1972 var
= make_ssa_name (var
, NULL
);
1973 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1975 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
1977 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1981 if (mask_perm_mask
&& (j
& 1))
1982 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1983 mask_perm_mask
, stmt
, gsi
);
1987 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1990 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1991 &def_stmt
, &def
, &dt
);
1992 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1996 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1998 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1999 == TYPE_VECTOR_SUBPARTS (masktype
));
2000 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2002 var
= make_ssa_name (var
, NULL
);
2003 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2005 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
2006 mask_op
, NULL_TREE
);
2007 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2013 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2016 if (!useless_type_conversion_p (vectype
, rettype
))
2018 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2019 == TYPE_VECTOR_SUBPARTS (rettype
));
2020 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2021 op
= make_ssa_name (var
, new_stmt
);
2022 gimple_call_set_lhs (new_stmt
, op
);
2023 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2024 var
= make_ssa_name (vec_dest
, NULL
);
2025 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2027 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
2032 var
= make_ssa_name (vec_dest
, new_stmt
);
2033 gimple_call_set_lhs (new_stmt
, var
);
2036 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2038 if (modifier
== NARROW
)
2045 var
= permute_vec_elements (prev_res
, var
,
2046 perm_mask
, stmt
, gsi
);
2047 new_stmt
= SSA_NAME_DEF_STMT (var
);
2050 if (prev_stmt_info
== NULL
)
2051 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2053 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2054 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2057 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2059 tree lhs
= gimple_call_lhs (stmt
);
2060 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2061 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2062 set_vinfo_for_stmt (stmt
, NULL
);
2063 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2064 gsi_replace (gsi
, new_stmt
, true);
2069 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2070 prev_stmt_info
= NULL
;
2071 for (i
= 0; i
< ncopies
; i
++)
2073 unsigned align
, misalign
;
2077 tree rhs
= gimple_call_arg (stmt
, 3);
2078 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2079 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2080 /* We should have catched mismatched types earlier. */
2081 gcc_assert (useless_type_conversion_p (vectype
,
2082 TREE_TYPE (vec_rhs
)));
2083 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2084 NULL_TREE
, &dummy
, gsi
,
2085 &ptr_incr
, false, &inv_p
);
2086 gcc_assert (!inv_p
);
2090 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2092 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2093 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2095 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2096 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2097 TYPE_SIZE_UNIT (vectype
));
2100 align
= TYPE_ALIGN_UNIT (vectype
);
2101 if (aligned_access_p (dr
))
2103 else if (DR_MISALIGNMENT (dr
) == -1)
2105 align
= TYPE_ALIGN_UNIT (elem_type
);
2109 misalign
= DR_MISALIGNMENT (dr
);
2110 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2113 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2114 gimple_call_arg (stmt
, 1),
2116 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2118 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2120 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2121 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2126 tree vec_mask
= NULL_TREE
;
2127 prev_stmt_info
= NULL
;
2128 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2129 for (i
= 0; i
< ncopies
; i
++)
2131 unsigned align
, misalign
;
2135 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2136 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2137 NULL_TREE
, &dummy
, gsi
,
2138 &ptr_incr
, false, &inv_p
);
2139 gcc_assert (!inv_p
);
2143 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2145 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2146 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2147 TYPE_SIZE_UNIT (vectype
));
2150 align
= TYPE_ALIGN_UNIT (vectype
);
2151 if (aligned_access_p (dr
))
2153 else if (DR_MISALIGNMENT (dr
) == -1)
2155 align
= TYPE_ALIGN_UNIT (elem_type
);
2159 misalign
= DR_MISALIGNMENT (dr
);
2160 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2163 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2164 gimple_call_arg (stmt
, 1),
2166 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
, NULL
));
2167 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2169 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2171 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2172 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2178 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2180 tree lhs
= gimple_call_lhs (stmt
);
2181 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2182 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2183 set_vinfo_for_stmt (stmt
, NULL
);
2184 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2185 gsi_replace (gsi
, new_stmt
, true);
2192 /* Function vectorizable_call.
2194 Check if STMT performs a function call that can be vectorized.
2195 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2196 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2197 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2200 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2206 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2207 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2208 tree vectype_out
, vectype_in
;
2211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2212 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2213 tree fndecl
, new_temp
, def
, rhs_type
;
2215 enum vect_def_type dt
[3]
2216 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2217 gimple new_stmt
= NULL
;
2219 vec
<tree
> vargs
= vNULL
;
2220 enum { NARROW
, NONE
, WIDEN
} modifier
;
2224 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2227 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2230 /* Is STMT a vectorizable call? */
2231 if (!is_gimple_call (stmt
))
2234 if (gimple_call_internal_p (stmt
)
2235 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2236 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2237 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2240 if (gimple_call_lhs (stmt
) == NULL_TREE
2241 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2244 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2246 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2248 /* Process function arguments. */
2249 rhs_type
= NULL_TREE
;
2250 vectype_in
= NULL_TREE
;
2251 nargs
= gimple_call_num_args (stmt
);
2253 /* Bail out if the function has more than three arguments, we do not have
2254 interesting builtin functions to vectorize with more than two arguments
2255 except for fma. No arguments is also not good. */
2256 if (nargs
== 0 || nargs
> 3)
2259 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2260 if (gimple_call_internal_p (stmt
)
2261 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2264 rhs_type
= unsigned_type_node
;
2267 for (i
= 0; i
< nargs
; i
++)
2271 op
= gimple_call_arg (stmt
, i
);
2273 /* We can only handle calls with arguments of the same type. */
2275 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2279 "argument types differ.\n");
2283 rhs_type
= TREE_TYPE (op
);
2285 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2286 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2290 "use not simple.\n");
2295 vectype_in
= opvectype
;
2297 && opvectype
!= vectype_in
)
2299 if (dump_enabled_p ())
2300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2301 "argument vector types differ.\n");
2305 /* If all arguments are external or constant defs use a vector type with
2306 the same size as the output vector type. */
2308 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2310 gcc_assert (vectype_in
);
2313 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2316 "no vectype for scalar type ");
2317 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2318 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2325 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2326 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2327 if (nunits_in
== nunits_out
/ 2)
2329 else if (nunits_out
== nunits_in
)
2331 else if (nunits_out
== nunits_in
/ 2)
2336 /* For now, we only vectorize functions if a target specific builtin
2337 is available. TODO -- in some cases, it might be profitable to
2338 insert the calls for pieces of the vector, in order to be able
2339 to vectorize other operations in the loop. */
2340 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2341 if (fndecl
== NULL_TREE
)
2343 if (gimple_call_internal_p (stmt
)
2344 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2347 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2348 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2349 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2350 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2352 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2353 { 0, 1, 2, ... vf - 1 } vector. */
2354 gcc_assert (nargs
== 0);
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2360 "function is not vectorizable.\n");
2365 gcc_assert (!gimple_vuse (stmt
));
2367 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2369 else if (modifier
== NARROW
)
2370 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2372 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2374 /* Sanity check: make sure that at least one copy of the vectorized stmt
2375 needs to be generated. */
2376 gcc_assert (ncopies
>= 1);
2378 if (!vec_stmt
) /* transformation not required. */
2380 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2381 if (dump_enabled_p ())
2382 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2384 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2390 if (dump_enabled_p ())
2391 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2394 scalar_dest
= gimple_call_lhs (stmt
);
2395 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2397 prev_stmt_info
= NULL
;
2401 for (j
= 0; j
< ncopies
; ++j
)
2403 /* Build argument list for the vectorized call. */
2405 vargs
.create (nargs
);
2411 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2412 vec
<tree
> vec_oprnds0
;
2414 for (i
= 0; i
< nargs
; i
++)
2415 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2416 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2417 vec_oprnds0
= vec_defs
[0];
2419 /* Arguments are ready. Create the new vector stmt. */
2420 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2423 for (k
= 0; k
< nargs
; k
++)
2425 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2426 vargs
[k
] = vec_oprndsk
[i
];
2428 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2429 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2430 gimple_call_set_lhs (new_stmt
, new_temp
);
2431 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2432 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2435 for (i
= 0; i
< nargs
; i
++)
2437 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2438 vec_oprndsi
.release ();
2443 for (i
= 0; i
< nargs
; i
++)
2445 op
= gimple_call_arg (stmt
, i
);
2448 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2451 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2453 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2456 vargs
.quick_push (vec_oprnd0
);
2459 if (gimple_call_internal_p (stmt
)
2460 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2462 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2464 for (k
= 0; k
< nunits_out
; ++k
)
2465 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2466 tree cst
= build_vector (vectype_out
, v
);
2468 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2469 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2470 new_temp
= make_ssa_name (new_var
, init_stmt
);
2471 gimple_assign_set_lhs (init_stmt
, new_temp
);
2472 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2473 new_temp
= make_ssa_name (vec_dest
, NULL
);
2474 new_stmt
= gimple_build_assign (new_temp
,
2475 gimple_assign_lhs (init_stmt
));
2479 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2480 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2481 gimple_call_set_lhs (new_stmt
, new_temp
);
2483 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2486 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2488 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2490 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2496 for (j
= 0; j
< ncopies
; ++j
)
2498 /* Build argument list for the vectorized call. */
2500 vargs
.create (nargs
* 2);
2506 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2507 vec
<tree
> vec_oprnds0
;
2509 for (i
= 0; i
< nargs
; i
++)
2510 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2511 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2512 vec_oprnds0
= vec_defs
[0];
2514 /* Arguments are ready. Create the new vector stmt. */
2515 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2519 for (k
= 0; k
< nargs
; k
++)
2521 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2522 vargs
.quick_push (vec_oprndsk
[i
]);
2523 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2525 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2526 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2527 gimple_call_set_lhs (new_stmt
, new_temp
);
2528 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2529 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2532 for (i
= 0; i
< nargs
; i
++)
2534 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2535 vec_oprndsi
.release ();
2540 for (i
= 0; i
< nargs
; i
++)
2542 op
= gimple_call_arg (stmt
, i
);
2546 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2548 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2552 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2554 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2556 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2559 vargs
.quick_push (vec_oprnd0
);
2560 vargs
.quick_push (vec_oprnd1
);
2563 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2564 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2565 gimple_call_set_lhs (new_stmt
, new_temp
);
2566 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2569 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2571 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2573 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2576 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2581 /* No current target implements this case. */
2587 /* The call in STMT might prevent it from being removed in dce.
2588 We however cannot remove it here, due to the way the ssa name
2589 it defines is mapped to the new definition. So just replace
2590 rhs of the statement with something harmless. */
2595 type
= TREE_TYPE (scalar_dest
);
2596 if (is_pattern_stmt_p (stmt_info
))
2597 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2599 lhs
= gimple_call_lhs (stmt
);
2600 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2601 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2602 set_vinfo_for_stmt (stmt
, NULL
);
2603 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2604 gsi_replace (gsi
, new_stmt
, false);
2610 struct simd_call_arg_info
2614 enum vect_def_type dt
;
2615 HOST_WIDE_INT linear_step
;
2619 /* Function vectorizable_simd_clone_call.
2621 Check if STMT performs a function call that can be vectorized
2622 by calling a simd clone of the function.
2623 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2624 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2625 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2628 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2629 gimple
*vec_stmt
, slp_tree slp_node
)
2634 tree vec_oprnd0
= NULL_TREE
;
2635 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2637 unsigned int nunits
;
2638 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2639 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2640 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2641 tree fndecl
, new_temp
, def
;
2643 gimple new_stmt
= NULL
;
2645 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2646 vec
<tree
> vargs
= vNULL
;
2648 tree lhs
, rtype
, ratype
;
2649 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2651 /* Is STMT a vectorizable call? */
2652 if (!is_gimple_call (stmt
))
2655 fndecl
= gimple_call_fndecl (stmt
);
2656 if (fndecl
== NULL_TREE
)
2659 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2660 if (node
== NULL
|| node
->simd_clones
== NULL
)
2663 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2666 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2669 if (gimple_call_lhs (stmt
)
2670 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2673 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2675 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2677 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2681 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2684 /* Process function arguments. */
2685 nargs
= gimple_call_num_args (stmt
);
2687 /* Bail out if the function has zero arguments. */
2691 arginfo
.create (nargs
);
2693 for (i
= 0; i
< nargs
; i
++)
2695 simd_call_arg_info thisarginfo
;
2698 thisarginfo
.linear_step
= 0;
2699 thisarginfo
.align
= 0;
2700 thisarginfo
.op
= NULL_TREE
;
2702 op
= gimple_call_arg (stmt
, i
);
2703 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2704 &def_stmt
, &def
, &thisarginfo
.dt
,
2705 &thisarginfo
.vectype
)
2706 || thisarginfo
.dt
== vect_uninitialized_def
)
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2710 "use not simple.\n");
2715 if (thisarginfo
.dt
== vect_constant_def
2716 || thisarginfo
.dt
== vect_external_def
)
2717 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2719 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2721 if (thisarginfo
.dt
!= vect_constant_def
2722 && thisarginfo
.dt
!= vect_external_def
2724 && TREE_CODE (op
) == SSA_NAME
2725 && simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false)
2726 && tree_fits_shwi_p (iv
.step
))
2728 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2729 thisarginfo
.op
= iv
.base
;
2731 else if ((thisarginfo
.dt
== vect_constant_def
2732 || thisarginfo
.dt
== vect_external_def
)
2733 && POINTER_TYPE_P (TREE_TYPE (op
)))
2734 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2736 arginfo
.quick_push (thisarginfo
);
2739 unsigned int badness
= 0;
2740 struct cgraph_node
*bestn
= NULL
;
2741 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
))
2742 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
));
2744 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2745 n
= n
->simdclone
->next_clone
)
2747 unsigned int this_badness
= 0;
2748 if (n
->simdclone
->simdlen
2749 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2750 || n
->simdclone
->nargs
!= nargs
)
2752 if (n
->simdclone
->simdlen
2753 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2754 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2755 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2756 if (n
->simdclone
->inbranch
)
2757 this_badness
+= 2048;
2758 int target_badness
= targetm
.simd_clone
.usable (n
);
2759 if (target_badness
< 0)
2761 this_badness
+= target_badness
* 512;
2762 /* FORNOW: Have to add code to add the mask argument. */
2763 if (n
->simdclone
->inbranch
)
2765 for (i
= 0; i
< nargs
; i
++)
2767 switch (n
->simdclone
->args
[i
].arg_type
)
2769 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2770 if (!useless_type_conversion_p
2771 (n
->simdclone
->args
[i
].orig_type
,
2772 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2774 else if (arginfo
[i
].dt
== vect_constant_def
2775 || arginfo
[i
].dt
== vect_external_def
2776 || arginfo
[i
].linear_step
)
2779 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2780 if (arginfo
[i
].dt
!= vect_constant_def
2781 && arginfo
[i
].dt
!= vect_external_def
)
2784 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2785 if (arginfo
[i
].dt
== vect_constant_def
2786 || arginfo
[i
].dt
== vect_external_def
2787 || (arginfo
[i
].linear_step
2788 != n
->simdclone
->args
[i
].linear_step
))
2791 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2795 case SIMD_CLONE_ARG_TYPE_MASK
:
2798 if (i
== (size_t) -1)
2800 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2805 if (arginfo
[i
].align
)
2806 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2807 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2809 if (i
== (size_t) -1)
2811 if (bestn
== NULL
|| this_badness
< badness
)
2814 badness
= this_badness
;
2824 for (i
= 0; i
< nargs
; i
++)
2825 if ((arginfo
[i
].dt
== vect_constant_def
2826 || arginfo
[i
].dt
== vect_external_def
)
2827 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2830 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2832 if (arginfo
[i
].vectype
== NULL
2833 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2834 > bestn
->simdclone
->simdlen
))
2841 fndecl
= bestn
->decl
;
2842 nunits
= bestn
->simdclone
->simdlen
;
2843 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2845 /* If the function isn't const, only allow it in simd loops where user
2846 has asserted that at least nunits consecutive iterations can be
2847 performed using SIMD instructions. */
2848 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2849 && gimple_vuse (stmt
))
2855 /* Sanity check: make sure that at least one copy of the vectorized stmt
2856 needs to be generated. */
2857 gcc_assert (ncopies
>= 1);
2859 if (!vec_stmt
) /* transformation not required. */
2861 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
) = bestn
->decl
;
2862 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2863 if (dump_enabled_p ())
2864 dump_printf_loc (MSG_NOTE
, vect_location
,
2865 "=== vectorizable_simd_clone_call ===\n");
2866 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2873 if (dump_enabled_p ())
2874 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2877 scalar_dest
= gimple_call_lhs (stmt
);
2878 vec_dest
= NULL_TREE
;
2883 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2884 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2885 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2888 rtype
= TREE_TYPE (ratype
);
2892 prev_stmt_info
= NULL
;
2893 for (j
= 0; j
< ncopies
; ++j
)
2895 /* Build argument list for the vectorized call. */
2897 vargs
.create (nargs
);
2901 for (i
= 0; i
< nargs
; i
++)
2903 unsigned int k
, l
, m
, o
;
2905 op
= gimple_call_arg (stmt
, i
);
2906 switch (bestn
->simdclone
->args
[i
].arg_type
)
2908 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2909 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2910 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2911 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2913 if (TYPE_VECTOR_SUBPARTS (atype
)
2914 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2916 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2917 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2918 / TYPE_VECTOR_SUBPARTS (atype
));
2919 gcc_assert ((k
& (k
- 1)) == 0);
2922 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2925 vec_oprnd0
= arginfo
[i
].op
;
2926 if ((m
& (k
- 1)) == 0)
2928 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2931 arginfo
[i
].op
= vec_oprnd0
;
2933 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2935 bitsize_int ((m
& (k
- 1)) * prec
));
2937 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2939 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2940 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2944 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2945 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2946 gcc_assert ((k
& (k
- 1)) == 0);
2947 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2949 vec_alloc (ctor_elts
, k
);
2952 for (l
= 0; l
< k
; l
++)
2954 if (m
== 0 && l
== 0)
2956 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2959 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2961 arginfo
[i
].op
= vec_oprnd0
;
2964 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
2968 vargs
.safe_push (vec_oprnd0
);
2971 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
2973 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2975 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2976 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2981 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2982 vargs
.safe_push (op
);
2984 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2989 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
2994 edge pe
= loop_preheader_edge (loop
);
2995 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2996 gcc_assert (!new_bb
);
2998 tree phi_res
= copy_ssa_name (op
, NULL
);
2999 gimple new_phi
= create_phi_node (phi_res
, loop
->header
);
3000 set_vinfo_for_stmt (new_phi
,
3001 new_stmt_vec_info (new_phi
, loop_vinfo
,
3003 add_phi_arg (new_phi
, arginfo
[i
].op
,
3004 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3006 = POINTER_TYPE_P (TREE_TYPE (op
))
3007 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3008 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3009 ? sizetype
: TREE_TYPE (op
);
3011 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3013 tree tcst
= wide_int_to_tree (type
, cst
);
3014 tree phi_arg
= copy_ssa_name (op
, NULL
);
3015 new_stmt
= gimple_build_assign_with_ops (code
, phi_arg
,
3017 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3018 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3019 set_vinfo_for_stmt (new_stmt
,
3020 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3022 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3024 arginfo
[i
].op
= phi_res
;
3025 vargs
.safe_push (phi_res
);
3030 = POINTER_TYPE_P (TREE_TYPE (op
))
3031 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3032 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3033 ? sizetype
: TREE_TYPE (op
);
3035 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3037 tree tcst
= wide_int_to_tree (type
, cst
);
3038 new_temp
= make_ssa_name (TREE_TYPE (op
), NULL
);
3040 = gimple_build_assign_with_ops (code
, new_temp
,
3041 arginfo
[i
].op
, tcst
);
3042 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3043 vargs
.safe_push (new_temp
);
3046 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3052 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3055 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3057 new_temp
= create_tmp_var (ratype
, NULL
);
3058 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3059 == TYPE_VECTOR_SUBPARTS (rtype
))
3060 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3062 new_temp
= make_ssa_name (rtype
, new_stmt
);
3063 gimple_call_set_lhs (new_stmt
, new_temp
);
3065 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3069 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3072 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3073 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3074 gcc_assert ((k
& (k
- 1)) == 0);
3075 for (l
= 0; l
< k
; l
++)
3080 t
= build_fold_addr_expr (new_temp
);
3081 t
= build2 (MEM_REF
, vectype
, t
,
3082 build_int_cst (TREE_TYPE (t
),
3083 l
* prec
/ BITS_PER_UNIT
));
3086 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3087 size_int (prec
), bitsize_int (l
* prec
));
3089 = gimple_build_assign (make_ssa_name (vectype
, NULL
), t
);
3090 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3091 if (j
== 0 && l
== 0)
3092 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3094 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3096 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3101 tree clobber
= build_constructor (ratype
, NULL
);
3102 TREE_THIS_VOLATILE (clobber
) = 1;
3103 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3104 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3108 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3110 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3111 / TYPE_VECTOR_SUBPARTS (rtype
));
3112 gcc_assert ((k
& (k
- 1)) == 0);
3113 if ((j
& (k
- 1)) == 0)
3114 vec_alloc (ret_ctor_elts
, k
);
3117 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3118 for (m
= 0; m
< o
; m
++)
3120 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3121 size_int (m
), NULL_TREE
, NULL_TREE
);
3123 = gimple_build_assign (make_ssa_name (rtype
, NULL
),
3125 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3126 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3127 gimple_assign_lhs (new_stmt
));
3129 tree clobber
= build_constructor (ratype
, NULL
);
3130 TREE_THIS_VOLATILE (clobber
) = 1;
3131 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3132 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3135 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3136 if ((j
& (k
- 1)) != k
- 1)
3138 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3140 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
),
3142 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3144 if ((unsigned) j
== k
- 1)
3145 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3147 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3149 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3154 tree t
= build_fold_addr_expr (new_temp
);
3155 t
= build2 (MEM_REF
, vectype
, t
,
3156 build_int_cst (TREE_TYPE (t
), 0));
3158 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
), t
);
3159 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3160 tree clobber
= build_constructor (ratype
, NULL
);
3161 TREE_THIS_VOLATILE (clobber
) = 1;
3162 vect_finish_stmt_generation (stmt
,
3163 gimple_build_assign (new_temp
,
3169 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3171 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3173 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3178 /* The call in STMT might prevent it from being removed in dce.
3179 We however cannot remove it here, due to the way the ssa name
3180 it defines is mapped to the new definition. So just replace
3181 rhs of the statement with something harmless. */
3188 type
= TREE_TYPE (scalar_dest
);
3189 if (is_pattern_stmt_p (stmt_info
))
3190 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3192 lhs
= gimple_call_lhs (stmt
);
3193 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3196 new_stmt
= gimple_build_nop ();
3197 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3198 set_vinfo_for_stmt (stmt
, NULL
);
3199 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3200 gsi_replace (gsi
, new_stmt
, false);
3201 unlink_stmt_vdef (stmt
);
3207 /* Function vect_gen_widened_results_half
3209 Create a vector stmt whose code, type, number of arguments, and result
3210 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3211 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3212 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3213 needs to be created (DECL is a function-decl of a target-builtin).
3214 STMT is the original scalar stmt that we are vectorizing. */
3217 vect_gen_widened_results_half (enum tree_code code
,
3219 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3220 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3226 /* Generate half of the widened result: */
3227 if (code
== CALL_EXPR
)
3229 /* Target specific support */
3230 if (op_type
== binary_op
)
3231 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3233 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3234 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3235 gimple_call_set_lhs (new_stmt
, new_temp
);
3239 /* Generic support */
3240 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3241 if (op_type
!= binary_op
)
3243 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
3245 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3246 gimple_assign_set_lhs (new_stmt
, new_temp
);
3248 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3254 /* Get vectorized definitions for loop-based vectorization. For the first
3255 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3256 scalar operand), and for the rest we get a copy with
3257 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3258 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3259 The vectors are collected into VEC_OPRNDS. */
3262 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3263 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3267 /* Get first vector operand. */
3268 /* All the vector operands except the very first one (that is scalar oprnd)
3270 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3271 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3273 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3275 vec_oprnds
->quick_push (vec_oprnd
);
3277 /* Get second vector operand. */
3278 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3279 vec_oprnds
->quick_push (vec_oprnd
);
3283 /* For conversion in multiple steps, continue to get operands
3286 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3290 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3291 For multi-step conversions store the resulting vectors and call the function
3295 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3296 int multi_step_cvt
, gimple stmt
,
3298 gimple_stmt_iterator
*gsi
,
3299 slp_tree slp_node
, enum tree_code code
,
3300 stmt_vec_info
*prev_stmt_info
)
3303 tree vop0
, vop1
, new_tmp
, vec_dest
;
3305 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3307 vec_dest
= vec_dsts
.pop ();
3309 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3311 /* Create demotion operation. */
3312 vop0
= (*vec_oprnds
)[i
];
3313 vop1
= (*vec_oprnds
)[i
+ 1];
3314 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3315 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3316 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3317 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3320 /* Store the resulting vector for next recursive call. */
3321 (*vec_oprnds
)[i
/2] = new_tmp
;
3324 /* This is the last step of the conversion sequence. Store the
3325 vectors in SLP_NODE or in vector info of the scalar statement
3326 (or in STMT_VINFO_RELATED_STMT chain). */
3328 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3331 if (!*prev_stmt_info
)
3332 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3334 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3336 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3341 /* For multi-step demotion operations we first generate demotion operations
3342 from the source type to the intermediate types, and then combine the
3343 results (stored in VEC_OPRNDS) in demotion operation to the destination
3347 /* At each level of recursion we have half of the operands we had at the
3349 vec_oprnds
->truncate ((i
+1)/2);
3350 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3351 stmt
, vec_dsts
, gsi
, slp_node
,
3352 VEC_PACK_TRUNC_EXPR
,
3356 vec_dsts
.quick_push (vec_dest
);
3360 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3361 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3362 the resulting vectors and call the function recursively. */
3365 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3366 vec
<tree
> *vec_oprnds1
,
3367 gimple stmt
, tree vec_dest
,
3368 gimple_stmt_iterator
*gsi
,
3369 enum tree_code code1
,
3370 enum tree_code code2
, tree decl1
,
3371 tree decl2
, int op_type
)
3374 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3375 gimple new_stmt1
, new_stmt2
;
3376 vec
<tree
> vec_tmp
= vNULL
;
3378 vec_tmp
.create (vec_oprnds0
->length () * 2);
3379 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3381 if (op_type
== binary_op
)
3382 vop1
= (*vec_oprnds1
)[i
];
3386 /* Generate the two halves of promotion operation. */
3387 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3388 op_type
, vec_dest
, gsi
, stmt
);
3389 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3390 op_type
, vec_dest
, gsi
, stmt
);
3391 if (is_gimple_call (new_stmt1
))
3393 new_tmp1
= gimple_call_lhs (new_stmt1
);
3394 new_tmp2
= gimple_call_lhs (new_stmt2
);
3398 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3399 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3402 /* Store the results for the next step. */
3403 vec_tmp
.quick_push (new_tmp1
);
3404 vec_tmp
.quick_push (new_tmp2
);
3407 vec_oprnds0
->release ();
3408 *vec_oprnds0
= vec_tmp
;
3412 /* Check if STMT performs a conversion operation, that can be vectorized.
3413 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3414 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3415 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3418 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3419 gimple
*vec_stmt
, slp_tree slp_node
)
3423 tree op0
, op1
= NULL_TREE
;
3424 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3425 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3426 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3427 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3428 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3429 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3433 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3434 gimple new_stmt
= NULL
;
3435 stmt_vec_info prev_stmt_info
;
3438 tree vectype_out
, vectype_in
;
3440 tree lhs_type
, rhs_type
;
3441 enum { NARROW
, NONE
, WIDEN
} modifier
;
3442 vec
<tree
> vec_oprnds0
= vNULL
;
3443 vec
<tree
> vec_oprnds1
= vNULL
;
3445 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3446 int multi_step_cvt
= 0;
3447 vec
<tree
> vec_dsts
= vNULL
;
3448 vec
<tree
> interm_types
= vNULL
;
3449 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3451 machine_mode rhs_mode
;
3452 unsigned short fltsz
;
3454 /* Is STMT a vectorizable conversion? */
3456 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3459 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3462 if (!is_gimple_assign (stmt
))
3465 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3468 code
= gimple_assign_rhs_code (stmt
);
3469 if (!CONVERT_EXPR_CODE_P (code
)
3470 && code
!= FIX_TRUNC_EXPR
3471 && code
!= FLOAT_EXPR
3472 && code
!= WIDEN_MULT_EXPR
3473 && code
!= WIDEN_LSHIFT_EXPR
)
3476 op_type
= TREE_CODE_LENGTH (code
);
3478 /* Check types of lhs and rhs. */
3479 scalar_dest
= gimple_assign_lhs (stmt
);
3480 lhs_type
= TREE_TYPE (scalar_dest
);
3481 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3483 op0
= gimple_assign_rhs1 (stmt
);
3484 rhs_type
= TREE_TYPE (op0
);
3486 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3487 && !((INTEGRAL_TYPE_P (lhs_type
)
3488 && INTEGRAL_TYPE_P (rhs_type
))
3489 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3490 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3493 if ((INTEGRAL_TYPE_P (lhs_type
)
3494 && (TYPE_PRECISION (lhs_type
)
3495 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3496 || (INTEGRAL_TYPE_P (rhs_type
)
3497 && (TYPE_PRECISION (rhs_type
)
3498 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3500 if (dump_enabled_p ())
3501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3502 "type conversion to/from bit-precision unsupported."
3507 /* Check the operands of the operation. */
3508 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3509 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3513 "use not simple.\n");
3516 if (op_type
== binary_op
)
3520 op1
= gimple_assign_rhs2 (stmt
);
3521 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3522 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3524 if (CONSTANT_CLASS_P (op0
))
3525 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3526 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3528 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3533 if (dump_enabled_p ())
3534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3535 "use not simple.\n");
3540 /* If op0 is an external or constant defs use a vector type of
3541 the same size as the output vector type. */
3543 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3545 gcc_assert (vectype_in
);
3548 if (dump_enabled_p ())
3550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3551 "no vectype for scalar type ");
3552 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3553 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3559 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3560 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3561 if (nunits_in
< nunits_out
)
3563 else if (nunits_out
== nunits_in
)
3568 /* Multiple types in SLP are handled by creating the appropriate number of
3569 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3571 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3573 else if (modifier
== NARROW
)
3574 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3576 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3578 /* Sanity check: make sure that at least one copy of the vectorized stmt
3579 needs to be generated. */
3580 gcc_assert (ncopies
>= 1);
3582 /* Supportable by target? */
3586 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3588 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3593 if (dump_enabled_p ())
3594 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3595 "conversion not supported by target.\n");
3599 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3600 &code1
, &code2
, &multi_step_cvt
,
3603 /* Binary widening operation can only be supported directly by the
3605 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3609 if (code
!= FLOAT_EXPR
3610 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3611 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3614 rhs_mode
= TYPE_MODE (rhs_type
);
3615 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3616 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3617 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3618 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3621 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3622 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3623 if (cvt_type
== NULL_TREE
)
3626 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3628 if (!supportable_convert_operation (code
, vectype_out
,
3629 cvt_type
, &decl1
, &codecvt1
))
3632 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3633 cvt_type
, &codecvt1
,
3634 &codecvt2
, &multi_step_cvt
,
3638 gcc_assert (multi_step_cvt
== 0);
3640 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3641 vectype_in
, &code1
, &code2
,
3642 &multi_step_cvt
, &interm_types
))
3646 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3649 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3650 codecvt2
= ERROR_MARK
;
3654 interm_types
.safe_push (cvt_type
);
3655 cvt_type
= NULL_TREE
;
3660 gcc_assert (op_type
== unary_op
);
3661 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3662 &code1
, &multi_step_cvt
,
3666 if (code
!= FIX_TRUNC_EXPR
3667 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3668 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3671 rhs_mode
= TYPE_MODE (rhs_type
);
3673 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3674 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3675 if (cvt_type
== NULL_TREE
)
3677 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3680 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3681 &code1
, &multi_step_cvt
,
3690 if (!vec_stmt
) /* transformation not required. */
3692 if (dump_enabled_p ())
3693 dump_printf_loc (MSG_NOTE
, vect_location
,
3694 "=== vectorizable_conversion ===\n");
3695 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3697 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3698 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3700 else if (modifier
== NARROW
)
3702 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3703 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3707 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3708 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3710 interm_types
.release ();
3715 if (dump_enabled_p ())
3716 dump_printf_loc (MSG_NOTE
, vect_location
,
3717 "transform conversion. ncopies = %d.\n", ncopies
);
3719 if (op_type
== binary_op
)
3721 if (CONSTANT_CLASS_P (op0
))
3722 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3723 else if (CONSTANT_CLASS_P (op1
))
3724 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3727 /* In case of multi-step conversion, we first generate conversion operations
3728 to the intermediate types, and then from that types to the final one.
3729 We create vector destinations for the intermediate type (TYPES) received
3730 from supportable_*_operation, and store them in the correct order
3731 for future use in vect_create_vectorized_*_stmts (). */
3732 vec_dsts
.create (multi_step_cvt
+ 1);
3733 vec_dest
= vect_create_destination_var (scalar_dest
,
3734 (cvt_type
&& modifier
== WIDEN
)
3735 ? cvt_type
: vectype_out
);
3736 vec_dsts
.quick_push (vec_dest
);
3740 for (i
= interm_types
.length () - 1;
3741 interm_types
.iterate (i
, &intermediate_type
); i
--)
3743 vec_dest
= vect_create_destination_var (scalar_dest
,
3745 vec_dsts
.quick_push (vec_dest
);
3750 vec_dest
= vect_create_destination_var (scalar_dest
,
3752 ? vectype_out
: cvt_type
);
3756 if (modifier
== WIDEN
)
3758 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3759 if (op_type
== binary_op
)
3760 vec_oprnds1
.create (1);
3762 else if (modifier
== NARROW
)
3763 vec_oprnds0
.create (
3764 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3766 else if (code
== WIDEN_LSHIFT_EXPR
)
3767 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3770 prev_stmt_info
= NULL
;
3774 for (j
= 0; j
< ncopies
; j
++)
3777 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3780 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3782 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3784 /* Arguments are ready, create the new vector stmt. */
3785 if (code1
== CALL_EXPR
)
3787 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3788 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3789 gimple_call_set_lhs (new_stmt
, new_temp
);
3793 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3794 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
3796 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3797 gimple_assign_set_lhs (new_stmt
, new_temp
);
3800 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3802 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3806 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3808 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3809 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3814 /* In case the vectorization factor (VF) is bigger than the number
3815 of elements that we can fit in a vectype (nunits), we have to
3816 generate more than one vector stmt - i.e - we need to "unroll"
3817 the vector stmt by a factor VF/nunits. */
3818 for (j
= 0; j
< ncopies
; j
++)
3825 if (code
== WIDEN_LSHIFT_EXPR
)
3830 /* Store vec_oprnd1 for every vector stmt to be created
3831 for SLP_NODE. We check during the analysis that all
3832 the shift arguments are the same. */
3833 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3834 vec_oprnds1
.quick_push (vec_oprnd1
);
3836 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3840 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3841 &vec_oprnds1
, slp_node
, -1);
3845 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3846 vec_oprnds0
.quick_push (vec_oprnd0
);
3847 if (op_type
== binary_op
)
3849 if (code
== WIDEN_LSHIFT_EXPR
)
3852 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3854 vec_oprnds1
.quick_push (vec_oprnd1
);
3860 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3861 vec_oprnds0
.truncate (0);
3862 vec_oprnds0
.quick_push (vec_oprnd0
);
3863 if (op_type
== binary_op
)
3865 if (code
== WIDEN_LSHIFT_EXPR
)
3868 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3870 vec_oprnds1
.truncate (0);
3871 vec_oprnds1
.quick_push (vec_oprnd1
);
3875 /* Arguments are ready. Create the new vector stmts. */
3876 for (i
= multi_step_cvt
; i
>= 0; i
--)
3878 tree this_dest
= vec_dsts
[i
];
3879 enum tree_code c1
= code1
, c2
= code2
;
3880 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3885 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3887 stmt
, this_dest
, gsi
,
3888 c1
, c2
, decl1
, decl2
,
3892 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3896 if (codecvt1
== CALL_EXPR
)
3898 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3899 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3900 gimple_call_set_lhs (new_stmt
, new_temp
);
3904 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3905 new_temp
= make_ssa_name (vec_dest
, NULL
);
3906 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
3911 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3914 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3917 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3920 if (!prev_stmt_info
)
3921 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3923 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3924 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3929 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3933 /* In case the vectorization factor (VF) is bigger than the number
3934 of elements that we can fit in a vectype (nunits), we have to
3935 generate more than one vector stmt - i.e - we need to "unroll"
3936 the vector stmt by a factor VF/nunits. */
3937 for (j
= 0; j
< ncopies
; j
++)
3941 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3945 vec_oprnds0
.truncate (0);
3946 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3947 vect_pow2 (multi_step_cvt
) - 1);
3950 /* Arguments are ready. Create the new vector stmts. */
3952 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3954 if (codecvt1
== CALL_EXPR
)
3956 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3957 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3958 gimple_call_set_lhs (new_stmt
, new_temp
);
3962 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3963 new_temp
= make_ssa_name (vec_dest
, NULL
);
3964 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
3968 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3969 vec_oprnds0
[i
] = new_temp
;
3972 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
3973 stmt
, vec_dsts
, gsi
,
3978 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3982 vec_oprnds0
.release ();
3983 vec_oprnds1
.release ();
3984 vec_dsts
.release ();
3985 interm_types
.release ();
3991 /* Function vectorizable_assignment.
3993 Check if STMT performs an assignment (copy) that can be vectorized.
3994 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3995 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3996 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3999 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4000 gimple
*vec_stmt
, slp_tree slp_node
)
4005 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4006 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4007 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4011 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4012 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4015 vec
<tree
> vec_oprnds
= vNULL
;
4017 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4018 gimple new_stmt
= NULL
;
4019 stmt_vec_info prev_stmt_info
= NULL
;
4020 enum tree_code code
;
4023 /* Multiple types in SLP are handled by creating the appropriate number of
4024 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4026 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4029 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4031 gcc_assert (ncopies
>= 1);
4033 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4036 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4039 /* Is vectorizable assignment? */
4040 if (!is_gimple_assign (stmt
))
4043 scalar_dest
= gimple_assign_lhs (stmt
);
4044 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4047 code
= gimple_assign_rhs_code (stmt
);
4048 if (gimple_assign_single_p (stmt
)
4049 || code
== PAREN_EXPR
4050 || CONVERT_EXPR_CODE_P (code
))
4051 op
= gimple_assign_rhs1 (stmt
);
4055 if (code
== VIEW_CONVERT_EXPR
)
4056 op
= TREE_OPERAND (op
, 0);
4058 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4059 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4061 if (dump_enabled_p ())
4062 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4063 "use not simple.\n");
4067 /* We can handle NOP_EXPR conversions that do not change the number
4068 of elements or the vector size. */
4069 if ((CONVERT_EXPR_CODE_P (code
)
4070 || code
== VIEW_CONVERT_EXPR
)
4072 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4073 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4074 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4077 /* We do not handle bit-precision changes. */
4078 if ((CONVERT_EXPR_CODE_P (code
)
4079 || code
== VIEW_CONVERT_EXPR
)
4080 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4081 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4082 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4083 || ((TYPE_PRECISION (TREE_TYPE (op
))
4084 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4085 /* But a conversion that does not change the bit-pattern is ok. */
4086 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4087 > TYPE_PRECISION (TREE_TYPE (op
)))
4088 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4090 if (dump_enabled_p ())
4091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4092 "type conversion to/from bit-precision "
4097 if (!vec_stmt
) /* transformation not required. */
4099 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4100 if (dump_enabled_p ())
4101 dump_printf_loc (MSG_NOTE
, vect_location
,
4102 "=== vectorizable_assignment ===\n");
4103 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4112 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4115 for (j
= 0; j
< ncopies
; j
++)
4119 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4121 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4123 /* Arguments are ready. create the new vector stmt. */
4124 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4126 if (CONVERT_EXPR_CODE_P (code
)
4127 || code
== VIEW_CONVERT_EXPR
)
4128 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4129 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4130 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4131 gimple_assign_set_lhs (new_stmt
, new_temp
);
4132 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4134 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4141 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4143 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4145 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4148 vec_oprnds
.release ();
4153 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4154 either as shift by a scalar or by a vector. */
4157 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4160 machine_mode vec_mode
;
4165 vectype
= get_vectype_for_scalar_type (scalar_type
);
4169 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4171 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4173 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4175 || (optab_handler (optab
, TYPE_MODE (vectype
))
4176 == CODE_FOR_nothing
))
4180 vec_mode
= TYPE_MODE (vectype
);
4181 icode
= (int) optab_handler (optab
, vec_mode
);
4182 if (icode
== CODE_FOR_nothing
)
4189 /* Function vectorizable_shift.
4191 Check if STMT performs a shift operation that can be vectorized.
4192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4197 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4198 gimple
*vec_stmt
, slp_tree slp_node
)
4202 tree op0
, op1
= NULL
;
4203 tree vec_oprnd1
= NULL_TREE
;
4204 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4206 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4207 enum tree_code code
;
4208 machine_mode vec_mode
;
4212 machine_mode optab_op2_mode
;
4215 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4216 gimple new_stmt
= NULL
;
4217 stmt_vec_info prev_stmt_info
;
4224 vec
<tree
> vec_oprnds0
= vNULL
;
4225 vec
<tree
> vec_oprnds1
= vNULL
;
4228 bool scalar_shift_arg
= true;
4229 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4232 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4235 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4238 /* Is STMT a vectorizable binary/unary operation? */
4239 if (!is_gimple_assign (stmt
))
4242 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4245 code
= gimple_assign_rhs_code (stmt
);
4247 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4248 || code
== RROTATE_EXPR
))
4251 scalar_dest
= gimple_assign_lhs (stmt
);
4252 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4253 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4254 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4256 if (dump_enabled_p ())
4257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4258 "bit-precision shifts not supported.\n");
4262 op0
= gimple_assign_rhs1 (stmt
);
4263 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4264 &def_stmt
, &def
, &dt
[0], &vectype
))
4266 if (dump_enabled_p ())
4267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4268 "use not simple.\n");
4271 /* If op0 is an external or constant def use a vector type with
4272 the same size as the output vector type. */
4274 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4276 gcc_assert (vectype
);
4279 if (dump_enabled_p ())
4280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4281 "no vectype for scalar type\n");
4285 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4286 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4287 if (nunits_out
!= nunits_in
)
4290 op1
= gimple_assign_rhs2 (stmt
);
4291 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4292 &def
, &dt
[1], &op1_vectype
))
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4296 "use not simple.\n");
4301 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4305 /* Multiple types in SLP are handled by creating the appropriate number of
4306 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4308 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4311 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4313 gcc_assert (ncopies
>= 1);
4315 /* Determine whether the shift amount is a vector, or scalar. If the
4316 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4318 if (dt
[1] == vect_internal_def
&& !slp_node
)
4319 scalar_shift_arg
= false;
4320 else if (dt
[1] == vect_constant_def
4321 || dt
[1] == vect_external_def
4322 || dt
[1] == vect_internal_def
)
4324 /* In SLP, need to check whether the shift count is the same,
4325 in loops if it is a constant or invariant, it is always
4329 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4332 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4333 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4334 scalar_shift_arg
= false;
4339 if (dump_enabled_p ())
4340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4341 "operand mode requires invariant argument.\n");
4345 /* Vector shifted by vector. */
4346 if (!scalar_shift_arg
)
4348 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4349 if (dump_enabled_p ())
4350 dump_printf_loc (MSG_NOTE
, vect_location
,
4351 "vector/vector shift/rotate found.\n");
4354 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4355 if (op1_vectype
== NULL_TREE
4356 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4358 if (dump_enabled_p ())
4359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4360 "unusable type for last operand in"
4361 " vector/vector shift/rotate.\n");
4365 /* See if the machine has a vector shifted by scalar insn and if not
4366 then see if it has a vector shifted by vector insn. */
4369 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4371 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4373 if (dump_enabled_p ())
4374 dump_printf_loc (MSG_NOTE
, vect_location
,
4375 "vector/scalar shift/rotate found.\n");
4379 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4381 && (optab_handler (optab
, TYPE_MODE (vectype
))
4382 != CODE_FOR_nothing
))
4384 scalar_shift_arg
= false;
4386 if (dump_enabled_p ())
4387 dump_printf_loc (MSG_NOTE
, vect_location
,
4388 "vector/vector shift/rotate found.\n");
4390 /* Unlike the other binary operators, shifts/rotates have
4391 the rhs being int, instead of the same type as the lhs,
4392 so make sure the scalar is the right type if we are
4393 dealing with vectors of long long/long/short/char. */
4394 if (dt
[1] == vect_constant_def
)
4395 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4396 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4400 && TYPE_MODE (TREE_TYPE (vectype
))
4401 != TYPE_MODE (TREE_TYPE (op1
)))
4403 if (dump_enabled_p ())
4404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4405 "unusable type for last operand in"
4406 " vector/vector shift/rotate.\n");
4409 if (vec_stmt
&& !slp_node
)
4411 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4412 op1
= vect_init_vector (stmt
, op1
,
4413 TREE_TYPE (vectype
), NULL
);
4420 /* Supportable by target? */
4423 if (dump_enabled_p ())
4424 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4428 vec_mode
= TYPE_MODE (vectype
);
4429 icode
= (int) optab_handler (optab
, vec_mode
);
4430 if (icode
== CODE_FOR_nothing
)
4432 if (dump_enabled_p ())
4433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4434 "op not supported by target.\n");
4435 /* Check only during analysis. */
4436 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4437 || (vf
< vect_min_worthwhile_factor (code
)
4440 if (dump_enabled_p ())
4441 dump_printf_loc (MSG_NOTE
, vect_location
,
4442 "proceeding using word mode.\n");
4445 /* Worthwhile without SIMD support? Check only during analysis. */
4446 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4447 && vf
< vect_min_worthwhile_factor (code
)
4450 if (dump_enabled_p ())
4451 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4452 "not worthwhile without SIMD support.\n");
4456 if (!vec_stmt
) /* transformation not required. */
4458 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4459 if (dump_enabled_p ())
4460 dump_printf_loc (MSG_NOTE
, vect_location
,
4461 "=== vectorizable_shift ===\n");
4462 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4468 if (dump_enabled_p ())
4469 dump_printf_loc (MSG_NOTE
, vect_location
,
4470 "transform binary/unary operation.\n");
4473 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4475 prev_stmt_info
= NULL
;
4476 for (j
= 0; j
< ncopies
; j
++)
4481 if (scalar_shift_arg
)
4483 /* Vector shl and shr insn patterns can be defined with scalar
4484 operand 2 (shift operand). In this case, use constant or loop
4485 invariant op1 directly, without extending it to vector mode
4487 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4488 if (!VECTOR_MODE_P (optab_op2_mode
))
4490 if (dump_enabled_p ())
4491 dump_printf_loc (MSG_NOTE
, vect_location
,
4492 "operand 1 using scalar mode.\n");
4494 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4495 vec_oprnds1
.quick_push (vec_oprnd1
);
4498 /* Store vec_oprnd1 for every vector stmt to be created
4499 for SLP_NODE. We check during the analysis that all
4500 the shift arguments are the same.
4501 TODO: Allow different constants for different vector
4502 stmts generated for an SLP instance. */
4503 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4504 vec_oprnds1
.quick_push (vec_oprnd1
);
4509 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4510 (a special case for certain kind of vector shifts); otherwise,
4511 operand 1 should be of a vector type (the usual case). */
4513 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4516 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4520 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4522 /* Arguments are ready. Create the new vector stmt. */
4523 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4525 vop1
= vec_oprnds1
[i
];
4526 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4527 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4528 gimple_assign_set_lhs (new_stmt
, new_temp
);
4529 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4531 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4538 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4540 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4541 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4544 vec_oprnds0
.release ();
4545 vec_oprnds1
.release ();
4551 /* Function vectorizable_operation.
4553 Check if STMT performs a binary, unary or ternary operation that can
4555 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4556 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4557 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4560 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4561 gimple
*vec_stmt
, slp_tree slp_node
)
4565 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4566 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4568 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4569 enum tree_code code
;
4570 machine_mode vec_mode
;
4577 enum vect_def_type dt
[3]
4578 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4579 gimple new_stmt
= NULL
;
4580 stmt_vec_info prev_stmt_info
;
4586 vec
<tree
> vec_oprnds0
= vNULL
;
4587 vec
<tree
> vec_oprnds1
= vNULL
;
4588 vec
<tree
> vec_oprnds2
= vNULL
;
4589 tree vop0
, vop1
, vop2
;
4590 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4593 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4596 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4599 /* Is STMT a vectorizable binary/unary operation? */
4600 if (!is_gimple_assign (stmt
))
4603 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4606 code
= gimple_assign_rhs_code (stmt
);
4608 /* For pointer addition, we should use the normal plus for
4609 the vector addition. */
4610 if (code
== POINTER_PLUS_EXPR
)
4613 /* Support only unary or binary operations. */
4614 op_type
= TREE_CODE_LENGTH (code
);
4615 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4617 if (dump_enabled_p ())
4618 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4619 "num. args = %d (not unary/binary/ternary op).\n",
4624 scalar_dest
= gimple_assign_lhs (stmt
);
4625 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4627 /* Most operations cannot handle bit-precision types without extra
4629 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4630 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4631 /* Exception are bitwise binary operations. */
4632 && code
!= BIT_IOR_EXPR
4633 && code
!= BIT_XOR_EXPR
4634 && code
!= BIT_AND_EXPR
)
4636 if (dump_enabled_p ())
4637 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4638 "bit-precision arithmetic not supported.\n");
4642 op0
= gimple_assign_rhs1 (stmt
);
4643 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4644 &def_stmt
, &def
, &dt
[0], &vectype
))
4646 if (dump_enabled_p ())
4647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4648 "use not simple.\n");
4651 /* If op0 is an external or constant def use a vector type with
4652 the same size as the output vector type. */
4654 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4656 gcc_assert (vectype
);
4659 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4662 "no vectype for scalar type ");
4663 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4665 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4671 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4672 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4673 if (nunits_out
!= nunits_in
)
4676 if (op_type
== binary_op
|| op_type
== ternary_op
)
4678 op1
= gimple_assign_rhs2 (stmt
);
4679 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4682 if (dump_enabled_p ())
4683 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4684 "use not simple.\n");
4688 if (op_type
== ternary_op
)
4690 op2
= gimple_assign_rhs3 (stmt
);
4691 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4694 if (dump_enabled_p ())
4695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4696 "use not simple.\n");
4702 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4706 /* Multiple types in SLP are handled by creating the appropriate number of
4707 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4709 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4712 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4714 gcc_assert (ncopies
>= 1);
4716 /* Shifts are handled in vectorizable_shift (). */
4717 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4718 || code
== RROTATE_EXPR
)
4721 /* Supportable by target? */
4723 vec_mode
= TYPE_MODE (vectype
);
4724 if (code
== MULT_HIGHPART_EXPR
)
4726 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4727 icode
= LAST_INSN_CODE
;
4729 icode
= CODE_FOR_nothing
;
4733 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4736 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4741 icode
= (int) optab_handler (optab
, vec_mode
);
4744 if (icode
== CODE_FOR_nothing
)
4746 if (dump_enabled_p ())
4747 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4748 "op not supported by target.\n");
4749 /* Check only during analysis. */
4750 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4751 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4753 if (dump_enabled_p ())
4754 dump_printf_loc (MSG_NOTE
, vect_location
,
4755 "proceeding using word mode.\n");
4758 /* Worthwhile without SIMD support? Check only during analysis. */
4759 if (!VECTOR_MODE_P (vec_mode
)
4761 && vf
< vect_min_worthwhile_factor (code
))
4763 if (dump_enabled_p ())
4764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4765 "not worthwhile without SIMD support.\n");
4769 if (!vec_stmt
) /* transformation not required. */
4771 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4772 if (dump_enabled_p ())
4773 dump_printf_loc (MSG_NOTE
, vect_location
,
4774 "=== vectorizable_operation ===\n");
4775 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_NOTE
, vect_location
,
4783 "transform binary/unary operation.\n");
4786 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4788 /* In case the vectorization factor (VF) is bigger than the number
4789 of elements that we can fit in a vectype (nunits), we have to generate
4790 more than one vector stmt - i.e - we need to "unroll" the
4791 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4792 from one copy of the vector stmt to the next, in the field
4793 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4794 stages to find the correct vector defs to be used when vectorizing
4795 stmts that use the defs of the current stmt. The example below
4796 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4797 we need to create 4 vectorized stmts):
4799 before vectorization:
4800 RELATED_STMT VEC_STMT
4804 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4806 RELATED_STMT VEC_STMT
4807 VS1_0: vx0 = memref0 VS1_1 -
4808 VS1_1: vx1 = memref1 VS1_2 -
4809 VS1_2: vx2 = memref2 VS1_3 -
4810 VS1_3: vx3 = memref3 - -
4811 S1: x = load - VS1_0
4814 step2: vectorize stmt S2 (done here):
4815 To vectorize stmt S2 we first need to find the relevant vector
4816 def for the first operand 'x'. This is, as usual, obtained from
4817 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4818 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4819 relevant vector def 'vx0'. Having found 'vx0' we can generate
4820 the vector stmt VS2_0, and as usual, record it in the
4821 STMT_VINFO_VEC_STMT of stmt S2.
4822 When creating the second copy (VS2_1), we obtain the relevant vector
4823 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4824 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4825 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4826 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4827 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4828 chain of stmts and pointers:
4829 RELATED_STMT VEC_STMT
4830 VS1_0: vx0 = memref0 VS1_1 -
4831 VS1_1: vx1 = memref1 VS1_2 -
4832 VS1_2: vx2 = memref2 VS1_3 -
4833 VS1_3: vx3 = memref3 - -
4834 S1: x = load - VS1_0
4835 VS2_0: vz0 = vx0 + v1 VS2_1 -
4836 VS2_1: vz1 = vx1 + v1 VS2_2 -
4837 VS2_2: vz2 = vx2 + v1 VS2_3 -
4838 VS2_3: vz3 = vx3 + v1 - -
4839 S2: z = x + 1 - VS2_0 */
4841 prev_stmt_info
= NULL
;
4842 for (j
= 0; j
< ncopies
; j
++)
4847 if (op_type
== binary_op
|| op_type
== ternary_op
)
4848 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4851 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4853 if (op_type
== ternary_op
)
4855 vec_oprnds2
.create (1);
4856 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4863 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4864 if (op_type
== ternary_op
)
4866 tree vec_oprnd
= vec_oprnds2
.pop ();
4867 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4872 /* Arguments are ready. Create the new vector stmt. */
4873 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4875 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4876 ? vec_oprnds1
[i
] : NULL_TREE
);
4877 vop2
= ((op_type
== ternary_op
)
4878 ? vec_oprnds2
[i
] : NULL_TREE
);
4879 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
4881 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4882 gimple_assign_set_lhs (new_stmt
, new_temp
);
4883 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4885 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4892 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4894 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4895 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4898 vec_oprnds0
.release ();
4899 vec_oprnds1
.release ();
4900 vec_oprnds2
.release ();
4905 /* A helper function to ensure data reference DR's base alignment
4909 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4914 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4916 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4917 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4919 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4920 DECL_USER_ALIGN (base_decl
) = 1;
4921 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4926 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4927 reversal of the vector elements. If that is impossible to do,
4931 perm_mask_for_reverse (tree vectype
)
4936 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4937 sel
= XALLOCAVEC (unsigned char, nunits
);
4939 for (i
= 0; i
< nunits
; ++i
)
4940 sel
[i
] = nunits
- 1 - i
;
4942 return vect_gen_perm_mask (vectype
, sel
);
4945 /* Function vectorizable_store.
4947 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4949 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4950 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4951 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4954 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4960 tree vec_oprnd
= NULL_TREE
;
4961 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4962 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4963 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4965 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4966 struct loop
*loop
= NULL
;
4967 machine_mode vec_mode
;
4969 enum dr_alignment_support alignment_support_scheme
;
4972 enum vect_def_type dt
;
4973 stmt_vec_info prev_stmt_info
= NULL
;
4974 tree dataref_ptr
= NULL_TREE
;
4975 tree dataref_offset
= NULL_TREE
;
4976 gimple ptr_incr
= NULL
;
4977 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4980 gimple next_stmt
, first_stmt
= NULL
;
4981 bool grouped_store
= false;
4982 bool store_lanes_p
= false;
4983 unsigned int group_size
, i
;
4984 vec
<tree
> dr_chain
= vNULL
;
4985 vec
<tree
> oprnds
= vNULL
;
4986 vec
<tree
> result_chain
= vNULL
;
4988 bool negative
= false;
4989 tree offset
= NULL_TREE
;
4990 vec
<tree
> vec_oprnds
= vNULL
;
4991 bool slp
= (slp_node
!= NULL
);
4992 unsigned int vec_num
;
4993 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4997 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4999 /* Multiple types in SLP are handled by creating the appropriate number of
5000 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5002 if (slp
|| PURE_SLP_STMT (stmt_info
))
5005 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5007 gcc_assert (ncopies
>= 1);
5009 /* FORNOW. This restriction should be relaxed. */
5010 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5012 if (dump_enabled_p ())
5013 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5014 "multiple types in nested loop.\n");
5018 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5021 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5024 /* Is vectorizable store? */
5026 if (!is_gimple_assign (stmt
))
5029 scalar_dest
= gimple_assign_lhs (stmt
);
5030 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5031 && is_pattern_stmt_p (stmt_info
))
5032 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5033 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5034 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5035 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5036 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5037 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5038 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5039 && TREE_CODE (scalar_dest
) != MEM_REF
)
5042 gcc_assert (gimple_assign_single_p (stmt
));
5043 op
= gimple_assign_rhs1 (stmt
);
5044 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5047 if (dump_enabled_p ())
5048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5049 "use not simple.\n");
5053 elem_type
= TREE_TYPE (vectype
);
5054 vec_mode
= TYPE_MODE (vectype
);
5056 /* FORNOW. In some cases can vectorize even if data-type not supported
5057 (e.g. - array initialization with 0). */
5058 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5061 if (!STMT_VINFO_DATA_REF (stmt_info
))
5065 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5066 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5067 size_zero_node
) < 0;
5068 if (negative
&& ncopies
> 1)
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5072 "multiple types with negative step.\n");
5078 gcc_assert (!grouped_store
);
5079 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5080 if (alignment_support_scheme
!= dr_aligned
5081 && alignment_support_scheme
!= dr_unaligned_supported
)
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5085 "negative step but alignment required.\n");
5088 if (dt
!= vect_constant_def
5089 && dt
!= vect_external_def
5090 && !perm_mask_for_reverse (vectype
))
5092 if (dump_enabled_p ())
5093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5094 "negative step and reversing not supported.\n");
5099 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5101 grouped_store
= true;
5102 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5103 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5105 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5106 if (vect_store_lanes_supported (vectype
, group_size
))
5107 store_lanes_p
= true;
5108 else if (!vect_grouped_store_supported (vectype
, group_size
))
5112 if (first_stmt
== stmt
)
5114 /* STMT is the leader of the group. Check the operands of all the
5115 stmts of the group. */
5116 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5119 gcc_assert (gimple_assign_single_p (next_stmt
));
5120 op
= gimple_assign_rhs1 (next_stmt
);
5121 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5122 &def_stmt
, &def
, &dt
))
5124 if (dump_enabled_p ())
5125 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5126 "use not simple.\n");
5129 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5134 if (!vec_stmt
) /* transformation not required. */
5136 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5137 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5144 ensure_base_align (stmt_info
, dr
);
5148 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5149 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5151 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5154 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5156 /* We vectorize all the stmts of the interleaving group when we
5157 reach the last stmt in the group. */
5158 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5159 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5168 grouped_store
= false;
5169 /* VEC_NUM is the number of vect stmts to be created for this
5171 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5172 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5173 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5174 op
= gimple_assign_rhs1 (first_stmt
);
5177 /* VEC_NUM is the number of vect stmts to be created for this
5179 vec_num
= group_size
;
5185 group_size
= vec_num
= 1;
5188 if (dump_enabled_p ())
5189 dump_printf_loc (MSG_NOTE
, vect_location
,
5190 "transform store. ncopies = %d\n", ncopies
);
5192 dr_chain
.create (group_size
);
5193 oprnds
.create (group_size
);
5195 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5196 gcc_assert (alignment_support_scheme
);
5197 /* Targets with store-lane instructions must not require explicit
5199 gcc_assert (!store_lanes_p
5200 || alignment_support_scheme
== dr_aligned
5201 || alignment_support_scheme
== dr_unaligned_supported
);
5204 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5207 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5209 aggr_type
= vectype
;
5211 /* In case the vectorization factor (VF) is bigger than the number
5212 of elements that we can fit in a vectype (nunits), we have to generate
5213 more than one vector stmt - i.e - we need to "unroll" the
5214 vector stmt by a factor VF/nunits. For more details see documentation in
5215 vect_get_vec_def_for_copy_stmt. */
5217 /* In case of interleaving (non-unit grouped access):
5224 We create vectorized stores starting from base address (the access of the
5225 first stmt in the chain (S2 in the above example), when the last store stmt
5226 of the chain (S4) is reached:
5229 VS2: &base + vec_size*1 = vx0
5230 VS3: &base + vec_size*2 = vx1
5231 VS4: &base + vec_size*3 = vx3
5233 Then permutation statements are generated:
5235 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5236 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5239 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5240 (the order of the data-refs in the output of vect_permute_store_chain
5241 corresponds to the order of scalar stmts in the interleaving chain - see
5242 the documentation of vect_permute_store_chain()).
5244 In case of both multiple types and interleaving, above vector stores and
5245 permutation stmts are created for every copy. The result vector stmts are
5246 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5247 STMT_VINFO_RELATED_STMT for the next copies.
5250 prev_stmt_info
= NULL
;
5251 for (j
= 0; j
< ncopies
; j
++)
5259 /* Get vectorized arguments for SLP_NODE. */
5260 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5261 NULL
, slp_node
, -1);
5263 vec_oprnd
= vec_oprnds
[0];
5267 /* For interleaved stores we collect vectorized defs for all the
5268 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5269 used as an input to vect_permute_store_chain(), and OPRNDS as
5270 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5272 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5273 OPRNDS are of size 1. */
5274 next_stmt
= first_stmt
;
5275 for (i
= 0; i
< group_size
; i
++)
5277 /* Since gaps are not supported for interleaved stores,
5278 GROUP_SIZE is the exact number of stmts in the chain.
5279 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5280 there is no interleaving, GROUP_SIZE is 1, and only one
5281 iteration of the loop will be executed. */
5282 gcc_assert (next_stmt
5283 && gimple_assign_single_p (next_stmt
));
5284 op
= gimple_assign_rhs1 (next_stmt
);
5286 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5288 dr_chain
.quick_push (vec_oprnd
);
5289 oprnds
.quick_push (vec_oprnd
);
5290 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5294 /* We should have catched mismatched types earlier. */
5295 gcc_assert (useless_type_conversion_p (vectype
,
5296 TREE_TYPE (vec_oprnd
)));
5297 bool simd_lane_access_p
5298 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5299 if (simd_lane_access_p
5300 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5301 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5302 && integer_zerop (DR_OFFSET (first_dr
))
5303 && integer_zerop (DR_INIT (first_dr
))
5304 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5305 get_alias_set (DR_REF (first_dr
))))
5307 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5308 dataref_offset
= build_int_cst (reference_alias_ptr_type
5309 (DR_REF (first_dr
)), 0);
5314 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5315 simd_lane_access_p
? loop
: NULL
,
5316 offset
, &dummy
, gsi
, &ptr_incr
,
5317 simd_lane_access_p
, &inv_p
);
5318 gcc_assert (bb_vinfo
|| !inv_p
);
5322 /* For interleaved stores we created vectorized defs for all the
5323 defs stored in OPRNDS in the previous iteration (previous copy).
5324 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5325 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5327 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5328 OPRNDS are of size 1. */
5329 for (i
= 0; i
< group_size
; i
++)
5332 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5334 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5335 dr_chain
[i
] = vec_oprnd
;
5336 oprnds
[i
] = vec_oprnd
;
5340 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5341 TYPE_SIZE_UNIT (aggr_type
));
5343 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5344 TYPE_SIZE_UNIT (aggr_type
));
5351 /* Combine all the vectors into an array. */
5352 vec_array
= create_vector_array (vectype
, vec_num
);
5353 for (i
= 0; i
< vec_num
; i
++)
5355 vec_oprnd
= dr_chain
[i
];
5356 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5360 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5361 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5362 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5363 gimple_call_set_lhs (new_stmt
, data_ref
);
5364 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5372 result_chain
.create (group_size
);
5374 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5378 next_stmt
= first_stmt
;
5379 for (i
= 0; i
< vec_num
; i
++)
5381 unsigned align
, misalign
;
5384 /* Bump the vector pointer. */
5385 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5389 vec_oprnd
= vec_oprnds
[i
];
5390 else if (grouped_store
)
5391 /* For grouped stores vectorized defs are interleaved in
5392 vect_permute_store_chain(). */
5393 vec_oprnd
= result_chain
[i
];
5395 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5398 : build_int_cst (reference_alias_ptr_type
5399 (DR_REF (first_dr
)), 0));
5400 align
= TYPE_ALIGN_UNIT (vectype
);
5401 if (aligned_access_p (first_dr
))
5403 else if (DR_MISALIGNMENT (first_dr
) == -1)
5405 TREE_TYPE (data_ref
)
5406 = build_aligned_type (TREE_TYPE (data_ref
),
5407 TYPE_ALIGN (elem_type
));
5408 align
= TYPE_ALIGN_UNIT (elem_type
);
5413 TREE_TYPE (data_ref
)
5414 = build_aligned_type (TREE_TYPE (data_ref
),
5415 TYPE_ALIGN (elem_type
));
5416 misalign
= DR_MISALIGNMENT (first_dr
);
5418 if (dataref_offset
== NULL_TREE
)
5419 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5423 && dt
!= vect_constant_def
5424 && dt
!= vect_external_def
)
5426 tree perm_mask
= perm_mask_for_reverse (vectype
);
5428 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5430 tree new_temp
= make_ssa_name (perm_dest
, NULL
);
5432 /* Generate the permute statement. */
5434 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, new_temp
,
5435 vec_oprnd
, vec_oprnd
,
5437 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5439 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5440 vec_oprnd
= new_temp
;
5443 /* Arguments are ready. Create the new vector stmt. */
5444 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5445 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5450 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5458 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5460 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5461 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5465 dr_chain
.release ();
5467 result_chain
.release ();
5468 vec_oprnds
.release ();
5473 /* Given a vector type VECTYPE and permutation SEL returns
5474 the VECTOR_CST mask that implements the permutation of the
5475 vector elements. If that is impossible to do, returns NULL. */
5478 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
5480 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5483 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5485 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5488 mask_elt_type
= lang_hooks
.types
.type_for_mode
5489 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5490 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5492 mask_elts
= XALLOCAVEC (tree
, nunits
);
5493 for (i
= nunits
- 1; i
>= 0; i
--)
5494 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5495 mask_vec
= build_vector (mask_type
, mask_elts
);
5500 /* Given a vector variable X and Y, that was generated for the scalar
5501 STMT, generate instructions to permute the vector elements of X and Y
5502 using permutation mask MASK_VEC, insert them at *GSI and return the
5503 permuted vector variable. */
5506 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5507 gimple_stmt_iterator
*gsi
)
5509 tree vectype
= TREE_TYPE (x
);
5510 tree perm_dest
, data_ref
;
5513 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5514 data_ref
= make_ssa_name (perm_dest
, NULL
);
5516 /* Generate the permute statement. */
5517 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5519 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5524 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5525 inserting them on the loops preheader edge. Returns true if we
5526 were successful in doing so (and thus STMT can be moved then),
5527 otherwise returns false. */
5530 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5536 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5538 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5539 if (!gimple_nop_p (def_stmt
)
5540 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5542 /* Make sure we don't need to recurse. While we could do
5543 so in simple cases when there are more complex use webs
5544 we don't have an easy way to preserve stmt order to fulfil
5545 dependencies within them. */
5548 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5550 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5552 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5553 if (!gimple_nop_p (def_stmt2
)
5554 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5564 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5566 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5567 if (!gimple_nop_p (def_stmt
)
5568 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5570 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5571 gsi_remove (&gsi
, false);
5572 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5579 /* vectorizable_load.
5581 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5583 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5584 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5585 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5588 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5589 slp_tree slp_node
, slp_instance slp_node_instance
)
5592 tree vec_dest
= NULL
;
5593 tree data_ref
= NULL
;
5594 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5595 stmt_vec_info prev_stmt_info
;
5596 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5597 struct loop
*loop
= NULL
;
5598 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5599 bool nested_in_vect_loop
= false;
5600 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5601 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5605 gimple new_stmt
= NULL
;
5607 enum dr_alignment_support alignment_support_scheme
;
5608 tree dataref_ptr
= NULL_TREE
;
5609 tree dataref_offset
= NULL_TREE
;
5610 gimple ptr_incr
= NULL
;
5611 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5613 int i
, j
, group_size
, group_gap
;
5614 tree msq
= NULL_TREE
, lsq
;
5615 tree offset
= NULL_TREE
;
5616 tree byte_offset
= NULL_TREE
;
5617 tree realignment_token
= NULL_TREE
;
5619 vec
<tree
> dr_chain
= vNULL
;
5620 bool grouped_load
= false;
5621 bool load_lanes_p
= false;
5624 bool negative
= false;
5625 bool compute_in_loop
= false;
5626 struct loop
*at_loop
;
5628 bool slp
= (slp_node
!= NULL
);
5629 bool slp_perm
= false;
5630 enum tree_code code
;
5631 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5634 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5635 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5636 int gather_scale
= 1;
5637 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5641 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5642 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5643 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5648 /* Multiple types in SLP are handled by creating the appropriate number of
5649 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5651 if (slp
|| PURE_SLP_STMT (stmt_info
))
5654 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5656 gcc_assert (ncopies
>= 1);
5658 /* FORNOW. This restriction should be relaxed. */
5659 if (nested_in_vect_loop
&& ncopies
> 1)
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5663 "multiple types in nested loop.\n");
5667 /* Invalidate assumptions made by dependence analysis when vectorization
5668 on the unrolled body effectively re-orders stmts. */
5670 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5671 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5672 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5674 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5676 "cannot perform implicit CSE when unrolling "
5677 "with negative dependence distance\n");
5681 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5684 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5687 /* Is vectorizable load? */
5688 if (!is_gimple_assign (stmt
))
5691 scalar_dest
= gimple_assign_lhs (stmt
);
5692 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5695 code
= gimple_assign_rhs_code (stmt
);
5696 if (code
!= ARRAY_REF
5697 && code
!= BIT_FIELD_REF
5698 && code
!= INDIRECT_REF
5699 && code
!= COMPONENT_REF
5700 && code
!= IMAGPART_EXPR
5701 && code
!= REALPART_EXPR
5703 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5706 if (!STMT_VINFO_DATA_REF (stmt_info
))
5709 elem_type
= TREE_TYPE (vectype
);
5710 mode
= TYPE_MODE (vectype
);
5712 /* FORNOW. In some cases can vectorize even if data-type not supported
5713 (e.g. - data copies). */
5714 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5718 "Aligned load, but unsupported type.\n");
5722 /* Check if the load is a part of an interleaving chain. */
5723 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5725 grouped_load
= true;
5727 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5729 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5730 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5732 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5733 if (vect_load_lanes_supported (vectype
, group_size
))
5734 load_lanes_p
= true;
5735 else if (!vect_grouped_load_supported (vectype
, group_size
))
5739 /* Invalidate assumptions made by dependence analysis when vectorization
5740 on the unrolled body effectively re-orders stmts. */
5741 if (!PURE_SLP_STMT (stmt_info
)
5742 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5743 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5744 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5748 "cannot perform implicit CSE when performing "
5749 "group loads with negative dependence distance\n");
5755 if (STMT_VINFO_GATHER_P (stmt_info
))
5759 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5760 &gather_off
, &gather_scale
);
5761 gcc_assert (gather_decl
);
5762 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5763 &def_stmt
, &def
, &gather_dt
,
5764 &gather_off_vectype
))
5766 if (dump_enabled_p ())
5767 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5768 "gather index use not simple.\n");
5772 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5776 negative
= tree_int_cst_compare (nested_in_vect_loop
5777 ? STMT_VINFO_DR_STEP (stmt_info
)
5779 size_zero_node
) < 0;
5780 if (negative
&& ncopies
> 1)
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5784 "multiple types with negative step.\n");
5792 if (dump_enabled_p ())
5793 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5794 "negative step for group load not supported"
5798 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5799 if (alignment_support_scheme
!= dr_aligned
5800 && alignment_support_scheme
!= dr_unaligned_supported
)
5802 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5804 "negative step but alignment required.\n");
5807 if (!perm_mask_for_reverse (vectype
))
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5811 "negative step and reversing not supported."
5818 if (!vec_stmt
) /* transformation not required. */
5820 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5821 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
5825 if (dump_enabled_p ())
5826 dump_printf_loc (MSG_NOTE
, vect_location
,
5827 "transform load. ncopies = %d\n", ncopies
);
5831 ensure_base_align (stmt_info
, dr
);
5833 if (STMT_VINFO_GATHER_P (stmt_info
))
5835 tree vec_oprnd0
= NULL_TREE
, op
;
5836 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
5837 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5838 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
5839 edge pe
= loop_preheader_edge (loop
);
5842 enum { NARROW
, NONE
, WIDEN
} modifier
;
5843 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
5845 if (nunits
== gather_off_nunits
)
5847 else if (nunits
== gather_off_nunits
/ 2)
5849 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
5852 for (i
= 0; i
< gather_off_nunits
; ++i
)
5853 sel
[i
] = i
| nunits
;
5855 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
5856 gcc_assert (perm_mask
!= NULL_TREE
);
5858 else if (nunits
== gather_off_nunits
* 2)
5860 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5863 for (i
= 0; i
< nunits
; ++i
)
5864 sel
[i
] = i
< gather_off_nunits
5865 ? i
: i
+ nunits
- gather_off_nunits
;
5867 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
5868 gcc_assert (perm_mask
!= NULL_TREE
);
5874 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
5875 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5876 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5877 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5878 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5879 scaletype
= TREE_VALUE (arglist
);
5880 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
5882 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5884 ptr
= fold_convert (ptrtype
, gather_base
);
5885 if (!is_gimple_min_invariant (ptr
))
5887 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5888 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5889 gcc_assert (!new_bb
);
5892 /* Currently we support only unconditional gather loads,
5893 so mask should be all ones. */
5894 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
5895 mask
= build_int_cst (masktype
, -1);
5896 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
5898 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
5899 mask
= build_vector_from_val (masktype
, mask
);
5900 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5902 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
5906 for (j
= 0; j
< 6; ++j
)
5908 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
5909 mask
= build_real (TREE_TYPE (masktype
), r
);
5910 mask
= build_vector_from_val (masktype
, mask
);
5911 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5916 scale
= build_int_cst (scaletype
, gather_scale
);
5918 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
5919 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
5920 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
5924 for (j
= 0; j
< 6; ++j
)
5926 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
5927 merge
= build_real (TREE_TYPE (rettype
), r
);
5931 merge
= build_vector_from_val (rettype
, merge
);
5932 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
5934 prev_stmt_info
= NULL
;
5935 for (j
= 0; j
< ncopies
; ++j
)
5937 if (modifier
== WIDEN
&& (j
& 1))
5938 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
5939 perm_mask
, stmt
, gsi
);
5942 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
5945 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
5947 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5949 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5950 == TYPE_VECTOR_SUBPARTS (idxtype
));
5951 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
5952 var
= make_ssa_name (var
, NULL
);
5953 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5955 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
5957 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5962 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
5964 if (!useless_type_conversion_p (vectype
, rettype
))
5966 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
5967 == TYPE_VECTOR_SUBPARTS (rettype
));
5968 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
5969 op
= make_ssa_name (var
, new_stmt
);
5970 gimple_call_set_lhs (new_stmt
, op
);
5971 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5972 var
= make_ssa_name (vec_dest
, NULL
);
5973 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
5975 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
5980 var
= make_ssa_name (vec_dest
, new_stmt
);
5981 gimple_call_set_lhs (new_stmt
, var
);
5984 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5986 if (modifier
== NARROW
)
5993 var
= permute_vec_elements (prev_res
, var
,
5994 perm_mask
, stmt
, gsi
);
5995 new_stmt
= SSA_NAME_DEF_STMT (var
);
5998 if (prev_stmt_info
== NULL
)
5999 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6001 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6002 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6006 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
6008 gimple_stmt_iterator incr_gsi
;
6014 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6015 gimple_seq stmts
= NULL
;
6016 tree stride_base
, stride_step
, alias_off
;
6018 gcc_assert (!nested_in_vect_loop
);
6021 = fold_build_pointer_plus
6022 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6023 size_binop (PLUS_EXPR
,
6024 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6025 convert_to_ptrofftype (DR_INIT (dr
))));
6026 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6028 /* For a load with loop-invariant (but other than power-of-2)
6029 stride (i.e. not a grouped access) like so:
6031 for (i = 0; i < n; i += stride)
6034 we generate a new induction variable and new accesses to
6035 form a new vector (or vectors, depending on ncopies):
6037 for (j = 0; ; j += VF*stride)
6039 tmp2 = array[j + stride];
6041 vectemp = {tmp1, tmp2, ...}
6044 ivstep
= stride_step
;
6045 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6046 build_int_cst (TREE_TYPE (ivstep
), vf
));
6048 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6050 create_iv (stride_base
, ivstep
, NULL
,
6051 loop
, &incr_gsi
, insert_after
,
6053 incr
= gsi_stmt (incr_gsi
);
6054 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6056 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6058 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6060 prev_stmt_info
= NULL
;
6061 running_off
= offvar
;
6062 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6063 for (j
= 0; j
< ncopies
; j
++)
6067 vec_alloc (v
, nunits
);
6068 for (i
= 0; i
< nunits
; i
++)
6070 tree newref
, newoff
;
6072 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
6073 running_off
, alias_off
);
6075 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6078 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6079 newoff
= copy_ssa_name (running_off
, NULL
);
6080 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
6081 running_off
, stride_step
);
6082 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6084 running_off
= newoff
;
6087 vec_inv
= build_constructor (vectype
, v
);
6088 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6089 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6092 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6094 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6095 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6102 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6104 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6105 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6106 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6108 /* Check if the chain of loads is already vectorized. */
6109 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6110 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6111 ??? But we can only do so if there is exactly one
6112 as we have no way to get at the rest. Leave the CSE
6114 ??? With the group load eventually participating
6115 in multiple different permutations (having multiple
6116 slp nodes which refer to the same group) the CSE
6117 is even wrong code. See PR56270. */
6120 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6123 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6124 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6126 /* VEC_NUM is the number of vect stmts to be created for this group. */
6129 grouped_load
= false;
6130 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6131 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6133 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6137 vec_num
= group_size
;
6145 group_size
= vec_num
= 1;
6149 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6150 gcc_assert (alignment_support_scheme
);
6151 /* Targets with load-lane instructions must not require explicit
6153 gcc_assert (!load_lanes_p
6154 || alignment_support_scheme
== dr_aligned
6155 || alignment_support_scheme
== dr_unaligned_supported
);
6157 /* In case the vectorization factor (VF) is bigger than the number
6158 of elements that we can fit in a vectype (nunits), we have to generate
6159 more than one vector stmt - i.e - we need to "unroll" the
6160 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6161 from one copy of the vector stmt to the next, in the field
6162 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6163 stages to find the correct vector defs to be used when vectorizing
6164 stmts that use the defs of the current stmt. The example below
6165 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6166 need to create 4 vectorized stmts):
6168 before vectorization:
6169 RELATED_STMT VEC_STMT
6173 step 1: vectorize stmt S1:
6174 We first create the vector stmt VS1_0, and, as usual, record a
6175 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6176 Next, we create the vector stmt VS1_1, and record a pointer to
6177 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6178 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6180 RELATED_STMT VEC_STMT
6181 VS1_0: vx0 = memref0 VS1_1 -
6182 VS1_1: vx1 = memref1 VS1_2 -
6183 VS1_2: vx2 = memref2 VS1_3 -
6184 VS1_3: vx3 = memref3 - -
6185 S1: x = load - VS1_0
6188 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6189 information we recorded in RELATED_STMT field is used to vectorize
6192 /* In case of interleaving (non-unit grouped access):
6199 Vectorized loads are created in the order of memory accesses
6200 starting from the access of the first stmt of the chain:
6203 VS2: vx1 = &base + vec_size*1
6204 VS3: vx3 = &base + vec_size*2
6205 VS4: vx4 = &base + vec_size*3
6207 Then permutation statements are generated:
6209 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6210 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6213 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6214 (the order of the data-refs in the output of vect_permute_load_chain
6215 corresponds to the order of scalar stmts in the interleaving chain - see
6216 the documentation of vect_permute_load_chain()).
6217 The generation of permutation stmts and recording them in
6218 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6220 In case of both multiple types and interleaving, the vector loads and
6221 permutation stmts above are created for every copy. The result vector
6222 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6223 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6225 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6226 on a target that supports unaligned accesses (dr_unaligned_supported)
6227 we generate the following code:
6231 p = p + indx * vectype_size;
6236 Otherwise, the data reference is potentially unaligned on a target that
6237 does not support unaligned accesses (dr_explicit_realign_optimized) -
6238 then generate the following code, in which the data in each iteration is
6239 obtained by two vector loads, one from the previous iteration, and one
6240 from the current iteration:
6242 msq_init = *(floor(p1))
6243 p2 = initial_addr + VS - 1;
6244 realignment_token = call target_builtin;
6247 p2 = p2 + indx * vectype_size
6249 vec_dest = realign_load (msq, lsq, realignment_token)
6254 /* If the misalignment remains the same throughout the execution of the
6255 loop, we can create the init_addr and permutation mask at the loop
6256 preheader. Otherwise, it needs to be created inside the loop.
6257 This can only occur when vectorizing memory accesses in the inner-loop
6258 nested within an outer-loop that is being vectorized. */
6260 if (nested_in_vect_loop
6261 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6262 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6264 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6265 compute_in_loop
= true;
6268 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6269 || alignment_support_scheme
== dr_explicit_realign
)
6270 && !compute_in_loop
)
6272 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6273 alignment_support_scheme
, NULL_TREE
,
6275 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6277 phi
= SSA_NAME_DEF_STMT (msq
);
6278 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6286 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6289 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6291 aggr_type
= vectype
;
6293 prev_stmt_info
= NULL
;
6294 for (j
= 0; j
< ncopies
; j
++)
6296 /* 1. Create the vector or array pointer update chain. */
6299 bool simd_lane_access_p
6300 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6301 if (simd_lane_access_p
6302 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6303 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6304 && integer_zerop (DR_OFFSET (first_dr
))
6305 && integer_zerop (DR_INIT (first_dr
))
6306 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6307 get_alias_set (DR_REF (first_dr
)))
6308 && (alignment_support_scheme
== dr_aligned
6309 || alignment_support_scheme
== dr_unaligned_supported
))
6311 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6312 dataref_offset
= build_int_cst (reference_alias_ptr_type
6313 (DR_REF (first_dr
)), 0);
6318 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6319 offset
, &dummy
, gsi
, &ptr_incr
,
6320 simd_lane_access_p
, &inv_p
,
6323 else if (dataref_offset
)
6324 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6325 TYPE_SIZE_UNIT (aggr_type
));
6327 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6328 TYPE_SIZE_UNIT (aggr_type
));
6330 if (grouped_load
|| slp_perm
)
6331 dr_chain
.create (vec_num
);
6337 vec_array
= create_vector_array (vectype
, vec_num
);
6340 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6341 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6342 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6343 gimple_call_set_lhs (new_stmt
, vec_array
);
6344 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6346 /* Extract each vector into an SSA_NAME. */
6347 for (i
= 0; i
< vec_num
; i
++)
6349 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6351 dr_chain
.quick_push (new_temp
);
6354 /* Record the mapping between SSA_NAMEs and statements. */
6355 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6359 for (i
= 0; i
< vec_num
; i
++)
6362 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6365 /* 2. Create the vector-load in the loop. */
6366 switch (alignment_support_scheme
)
6369 case dr_unaligned_supported
:
6371 unsigned int align
, misalign
;
6374 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6377 : build_int_cst (reference_alias_ptr_type
6378 (DR_REF (first_dr
)), 0));
6379 align
= TYPE_ALIGN_UNIT (vectype
);
6380 if (alignment_support_scheme
== dr_aligned
)
6382 gcc_assert (aligned_access_p (first_dr
));
6385 else if (DR_MISALIGNMENT (first_dr
) == -1)
6387 TREE_TYPE (data_ref
)
6388 = build_aligned_type (TREE_TYPE (data_ref
),
6389 TYPE_ALIGN (elem_type
));
6390 align
= TYPE_ALIGN_UNIT (elem_type
);
6395 TREE_TYPE (data_ref
)
6396 = build_aligned_type (TREE_TYPE (data_ref
),
6397 TYPE_ALIGN (elem_type
));
6398 misalign
= DR_MISALIGNMENT (first_dr
);
6400 if (dataref_offset
== NULL_TREE
)
6401 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6405 case dr_explicit_realign
:
6410 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6412 if (compute_in_loop
)
6413 msq
= vect_setup_realignment (first_stmt
, gsi
,
6415 dr_explicit_realign
,
6418 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
6419 new_stmt
= gimple_build_assign_with_ops
6420 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
6422 (TREE_TYPE (dataref_ptr
),
6423 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6424 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6426 = build2 (MEM_REF
, vectype
, ptr
,
6427 build_int_cst (reference_alias_ptr_type
6428 (DR_REF (first_dr
)), 0));
6429 vec_dest
= vect_create_destination_var (scalar_dest
,
6431 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6432 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6433 gimple_assign_set_lhs (new_stmt
, new_temp
);
6434 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6435 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6436 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6439 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6440 TYPE_SIZE_UNIT (elem_type
));
6441 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6442 new_stmt
= gimple_build_assign_with_ops
6443 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
6446 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6447 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6448 gimple_assign_set_lhs (new_stmt
, ptr
);
6449 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6451 = build2 (MEM_REF
, vectype
, ptr
,
6452 build_int_cst (reference_alias_ptr_type
6453 (DR_REF (first_dr
)), 0));
6456 case dr_explicit_realign_optimized
:
6457 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
6458 new_stmt
= gimple_build_assign_with_ops
6459 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
6461 (TREE_TYPE (dataref_ptr
),
6462 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6463 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6465 = build2 (MEM_REF
, vectype
, new_temp
,
6466 build_int_cst (reference_alias_ptr_type
6467 (DR_REF (first_dr
)), 0));
6472 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6473 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6474 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6475 gimple_assign_set_lhs (new_stmt
, new_temp
);
6476 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6478 /* 3. Handle explicit realignment if necessary/supported.
6480 vec_dest = realign_load (msq, lsq, realignment_token) */
6481 if (alignment_support_scheme
== dr_explicit_realign_optimized
6482 || alignment_support_scheme
== dr_explicit_realign
)
6484 lsq
= gimple_assign_lhs (new_stmt
);
6485 if (!realignment_token
)
6486 realignment_token
= dataref_ptr
;
6487 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6489 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
6492 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6493 gimple_assign_set_lhs (new_stmt
, new_temp
);
6494 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6496 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6499 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6500 add_phi_arg (phi
, lsq
,
6501 loop_latch_edge (containing_loop
),
6507 /* 4. Handle invariant-load. */
6508 if (inv_p
&& !bb_vinfo
)
6510 gcc_assert (!grouped_load
);
6511 /* If we have versioned for aliasing or the loop doesn't
6512 have any data dependencies that would preclude this,
6513 then we are sure this is a loop invariant load and
6514 thus we can insert it on the preheader edge. */
6515 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6516 && !nested_in_vect_loop
6517 && hoist_defs_of_uses (stmt
, loop
))
6519 if (dump_enabled_p ())
6521 dump_printf_loc (MSG_NOTE
, vect_location
,
6522 "hoisting out of the vectorized "
6524 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6525 dump_printf (MSG_NOTE
, "\n");
6527 tree tem
= copy_ssa_name (scalar_dest
, NULL
);
6528 gsi_insert_on_edge_immediate
6529 (loop_preheader_edge (loop
),
6530 gimple_build_assign (tem
,
6532 (gimple_assign_rhs1 (stmt
))));
6533 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6537 gimple_stmt_iterator gsi2
= *gsi
;
6539 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6542 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6543 set_vinfo_for_stmt (new_stmt
,
6544 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6550 tree perm_mask
= perm_mask_for_reverse (vectype
);
6551 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6552 perm_mask
, stmt
, gsi
);
6553 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6556 /* Collect vector loads and later create their permutation in
6557 vect_transform_grouped_load (). */
6558 if (grouped_load
|| slp_perm
)
6559 dr_chain
.quick_push (new_temp
);
6561 /* Store vector loads in the corresponding SLP_NODE. */
6562 if (slp
&& !slp_perm
)
6563 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6565 /* Bump the vector pointer to account for a gap. */
6566 if (slp
&& group_gap
!= 0)
6568 tree bump
= size_binop (MULT_EXPR
,
6569 TYPE_SIZE_UNIT (elem_type
),
6570 size_int (group_gap
));
6571 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6576 if (slp
&& !slp_perm
)
6581 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6582 slp_node_instance
, false))
6584 dr_chain
.release ();
6593 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6594 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6599 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6601 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6602 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6605 dr_chain
.release ();
6611 /* Function vect_is_simple_cond.
6614 LOOP - the loop that is being vectorized.
6615 COND - Condition that is checked for simple use.
6618 *COMP_VECTYPE - the vector type for the comparison.
6620 Returns whether a COND can be vectorized. Checks whether
6621 condition operands are supportable using vec_is_simple_use. */
6624 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6625 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6629 enum vect_def_type dt
;
6630 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6632 if (!COMPARISON_CLASS_P (cond
))
6635 lhs
= TREE_OPERAND (cond
, 0);
6636 rhs
= TREE_OPERAND (cond
, 1);
6638 if (TREE_CODE (lhs
) == SSA_NAME
)
6640 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6641 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6642 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6645 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6646 && TREE_CODE (lhs
) != FIXED_CST
)
6649 if (TREE_CODE (rhs
) == SSA_NAME
)
6651 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6652 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6653 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6656 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6657 && TREE_CODE (rhs
) != FIXED_CST
)
6660 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6664 /* vectorizable_condition.
6666 Check if STMT is conditional modify expression that can be vectorized.
6667 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6668 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6671 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6672 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6673 else caluse if it is 2).
6675 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6678 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6679 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6682 tree scalar_dest
= NULL_TREE
;
6683 tree vec_dest
= NULL_TREE
;
6684 tree cond_expr
, then_clause
, else_clause
;
6685 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6686 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6687 tree comp_vectype
= NULL_TREE
;
6688 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6689 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6690 tree vec_compare
, vec_cond_expr
;
6692 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6694 enum vect_def_type dt
, dts
[4];
6695 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6697 enum tree_code code
;
6698 stmt_vec_info prev_stmt_info
= NULL
;
6700 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6701 vec
<tree
> vec_oprnds0
= vNULL
;
6702 vec
<tree
> vec_oprnds1
= vNULL
;
6703 vec
<tree
> vec_oprnds2
= vNULL
;
6704 vec
<tree
> vec_oprnds3
= vNULL
;
6707 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6710 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6712 gcc_assert (ncopies
>= 1);
6713 if (reduc_index
&& ncopies
> 1)
6714 return false; /* FORNOW */
6716 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6719 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6722 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6723 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6727 /* FORNOW: not yet supported. */
6728 if (STMT_VINFO_LIVE_P (stmt_info
))
6730 if (dump_enabled_p ())
6731 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6732 "value used after loop.\n");
6736 /* Is vectorizable conditional operation? */
6737 if (!is_gimple_assign (stmt
))
6740 code
= gimple_assign_rhs_code (stmt
);
6742 if (code
!= COND_EXPR
)
6745 cond_expr
= gimple_assign_rhs1 (stmt
);
6746 then_clause
= gimple_assign_rhs2 (stmt
);
6747 else_clause
= gimple_assign_rhs3 (stmt
);
6749 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6754 if (TREE_CODE (then_clause
) == SSA_NAME
)
6756 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6757 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6758 &then_def_stmt
, &def
, &dt
))
6761 else if (TREE_CODE (then_clause
) != INTEGER_CST
6762 && TREE_CODE (then_clause
) != REAL_CST
6763 && TREE_CODE (then_clause
) != FIXED_CST
)
6766 if (TREE_CODE (else_clause
) == SSA_NAME
)
6768 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6769 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6770 &else_def_stmt
, &def
, &dt
))
6773 else if (TREE_CODE (else_clause
) != INTEGER_CST
6774 && TREE_CODE (else_clause
) != REAL_CST
6775 && TREE_CODE (else_clause
) != FIXED_CST
)
6778 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
6779 /* The result of a vector comparison should be signed type. */
6780 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
6781 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
6782 if (vec_cmp_type
== NULL_TREE
)
6787 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6788 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
6795 vec_oprnds0
.create (1);
6796 vec_oprnds1
.create (1);
6797 vec_oprnds2
.create (1);
6798 vec_oprnds3
.create (1);
6802 scalar_dest
= gimple_assign_lhs (stmt
);
6803 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6805 /* Handle cond expr. */
6806 for (j
= 0; j
< ncopies
; j
++)
6808 gimple new_stmt
= NULL
;
6813 auto_vec
<tree
, 4> ops
;
6814 auto_vec
<vec
<tree
>, 4> vec_defs
;
6816 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
6817 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
6818 ops
.safe_push (then_clause
);
6819 ops
.safe_push (else_clause
);
6820 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
6821 vec_oprnds3
= vec_defs
.pop ();
6822 vec_oprnds2
= vec_defs
.pop ();
6823 vec_oprnds1
= vec_defs
.pop ();
6824 vec_oprnds0
= vec_defs
.pop ();
6827 vec_defs
.release ();
6833 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
6835 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
6836 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
6839 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
6841 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
6842 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
6843 if (reduc_index
== 1)
6844 vec_then_clause
= reduc_def
;
6847 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
6849 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
6850 NULL
, >emp
, &def
, &dts
[2]);
6852 if (reduc_index
== 2)
6853 vec_else_clause
= reduc_def
;
6856 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
6858 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
6859 NULL
, >emp
, &def
, &dts
[3]);
6865 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
6866 vec_oprnds0
.pop ());
6867 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
6868 vec_oprnds1
.pop ());
6869 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
6870 vec_oprnds2
.pop ());
6871 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
6872 vec_oprnds3
.pop ());
6877 vec_oprnds0
.quick_push (vec_cond_lhs
);
6878 vec_oprnds1
.quick_push (vec_cond_rhs
);
6879 vec_oprnds2
.quick_push (vec_then_clause
);
6880 vec_oprnds3
.quick_push (vec_else_clause
);
6883 /* Arguments are ready. Create the new vector stmt. */
6884 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
6886 vec_cond_rhs
= vec_oprnds1
[i
];
6887 vec_then_clause
= vec_oprnds2
[i
];
6888 vec_else_clause
= vec_oprnds3
[i
];
6890 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
6891 vec_cond_lhs
, vec_cond_rhs
);
6892 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6893 vec_compare
, vec_then_clause
, vec_else_clause
);
6895 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6896 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6897 gimple_assign_set_lhs (new_stmt
, new_temp
);
6898 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6900 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6907 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6909 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6911 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6914 vec_oprnds0
.release ();
6915 vec_oprnds1
.release ();
6916 vec_oprnds2
.release ();
6917 vec_oprnds3
.release ();
6923 /* Make sure the statement is vectorizable. */
6926 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
6928 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6929 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6930 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
6932 tree scalar_type
, vectype
;
6933 gimple pattern_stmt
;
6934 gimple_seq pattern_def_seq
;
6936 if (dump_enabled_p ())
6938 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
6939 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6940 dump_printf (MSG_NOTE
, "\n");
6943 if (gimple_has_volatile_ops (stmt
))
6945 if (dump_enabled_p ())
6946 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6947 "not vectorized: stmt has volatile operands\n");
6952 /* Skip stmts that do not need to be vectorized. In loops this is expected
6954 - the COND_EXPR which is the loop exit condition
6955 - any LABEL_EXPRs in the loop
6956 - computations that are used only for array indexing or loop control.
6957 In basic blocks we only analyze statements that are a part of some SLP
6958 instance, therefore, all the statements are relevant.
6960 Pattern statement needs to be analyzed instead of the original statement
6961 if the original statement is not relevant. Otherwise, we analyze both
6962 statements. In basic blocks we are called from some SLP instance
6963 traversal, don't analyze pattern stmts instead, the pattern stmts
6964 already will be part of SLP instance. */
6966 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
6967 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6968 && !STMT_VINFO_LIVE_P (stmt_info
))
6970 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6972 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6973 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6975 /* Analyze PATTERN_STMT instead of the original stmt. */
6976 stmt
= pattern_stmt
;
6977 stmt_info
= vinfo_for_stmt (pattern_stmt
);
6978 if (dump_enabled_p ())
6980 dump_printf_loc (MSG_NOTE
, vect_location
,
6981 "==> examining pattern statement: ");
6982 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6983 dump_printf (MSG_NOTE
, "\n");
6988 if (dump_enabled_p ())
6989 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
6994 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6997 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6998 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7000 /* Analyze PATTERN_STMT too. */
7001 if (dump_enabled_p ())
7003 dump_printf_loc (MSG_NOTE
, vect_location
,
7004 "==> examining pattern statement: ");
7005 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7006 dump_printf (MSG_NOTE
, "\n");
7009 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7013 if (is_pattern_stmt_p (stmt_info
)
7015 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7017 gimple_stmt_iterator si
;
7019 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7021 gimple pattern_def_stmt
= gsi_stmt (si
);
7022 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7023 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7025 /* Analyze def stmt of STMT if it's a pattern stmt. */
7026 if (dump_enabled_p ())
7028 dump_printf_loc (MSG_NOTE
, vect_location
,
7029 "==> examining pattern def statement: ");
7030 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7031 dump_printf (MSG_NOTE
, "\n");
7034 if (!vect_analyze_stmt (pattern_def_stmt
,
7035 need_to_vectorize
, node
))
7041 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7043 case vect_internal_def
:
7046 case vect_reduction_def
:
7047 case vect_nested_cycle
:
7048 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7049 || relevance
== vect_used_in_outer_by_reduction
7050 || relevance
== vect_unused_in_scope
));
7053 case vect_induction_def
:
7054 case vect_constant_def
:
7055 case vect_external_def
:
7056 case vect_unknown_def_type
:
7063 gcc_assert (PURE_SLP_STMT (stmt_info
));
7065 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7066 if (dump_enabled_p ())
7068 dump_printf_loc (MSG_NOTE
, vect_location
,
7069 "get vectype for scalar type: ");
7070 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7071 dump_printf (MSG_NOTE
, "\n");
7074 vectype
= get_vectype_for_scalar_type (scalar_type
);
7077 if (dump_enabled_p ())
7079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7080 "not SLPed: unsupported data-type ");
7081 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7083 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7088 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7091 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7092 dump_printf (MSG_NOTE
, "\n");
7095 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7098 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7100 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7101 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7102 || (is_gimple_call (stmt
)
7103 && gimple_call_lhs (stmt
) == NULL_TREE
));
7104 *need_to_vectorize
= true;
7109 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7110 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7111 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7112 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7113 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7114 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7115 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7116 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7117 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7118 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7119 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7120 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7124 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7125 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7126 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7127 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7128 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7129 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7130 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7131 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7132 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7137 if (dump_enabled_p ())
7139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7140 "not vectorized: relevant stmt not ");
7141 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7142 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7143 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7152 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7153 need extra handling, except for vectorizable reductions. */
7154 if (STMT_VINFO_LIVE_P (stmt_info
)
7155 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7156 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7160 if (dump_enabled_p ())
7162 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7163 "not vectorized: live stmt not ");
7164 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7165 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7166 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7176 /* Function vect_transform_stmt.
7178 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7181 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7182 bool *grouped_store
, slp_tree slp_node
,
7183 slp_instance slp_node_instance
)
7185 bool is_store
= false;
7186 gimple vec_stmt
= NULL
;
7187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7190 switch (STMT_VINFO_TYPE (stmt_info
))
7192 case type_demotion_vec_info_type
:
7193 case type_promotion_vec_info_type
:
7194 case type_conversion_vec_info_type
:
7195 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7199 case induc_vec_info_type
:
7200 gcc_assert (!slp_node
);
7201 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7205 case shift_vec_info_type
:
7206 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7210 case op_vec_info_type
:
7211 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7215 case assignment_vec_info_type
:
7216 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7220 case load_vec_info_type
:
7221 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7226 case store_vec_info_type
:
7227 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7229 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7231 /* In case of interleaving, the whole chain is vectorized when the
7232 last store in the chain is reached. Store stmts before the last
7233 one are skipped, and there vec_stmt_info shouldn't be freed
7235 *grouped_store
= true;
7236 if (STMT_VINFO_VEC_STMT (stmt_info
))
7243 case condition_vec_info_type
:
7244 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7248 case call_vec_info_type
:
7249 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7250 stmt
= gsi_stmt (*gsi
);
7251 if (is_gimple_call (stmt
)
7252 && gimple_call_internal_p (stmt
)
7253 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7257 case call_simd_clone_vec_info_type
:
7258 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7259 stmt
= gsi_stmt (*gsi
);
7262 case reduc_vec_info_type
:
7263 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7268 if (!STMT_VINFO_LIVE_P (stmt_info
))
7270 if (dump_enabled_p ())
7271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7272 "stmt not supported.\n");
7277 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7278 is being vectorized, but outside the immediately enclosing loop. */
7280 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7281 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7282 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7283 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7284 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7285 || STMT_VINFO_RELEVANT (stmt_info
) ==
7286 vect_used_in_outer_by_reduction
))
7288 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7289 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7290 imm_use_iterator imm_iter
;
7291 use_operand_p use_p
;
7295 if (dump_enabled_p ())
7296 dump_printf_loc (MSG_NOTE
, vect_location
,
7297 "Record the vdef for outer-loop vectorization.\n");
7299 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7300 (to be used when vectorizing outer-loop stmts that use the DEF of
7302 if (gimple_code (stmt
) == GIMPLE_PHI
)
7303 scalar_dest
= PHI_RESULT (stmt
);
7305 scalar_dest
= gimple_assign_lhs (stmt
);
7307 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7309 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7311 exit_phi
= USE_STMT (use_p
);
7312 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7317 /* Handle stmts whose DEF is used outside the loop-nest that is
7318 being vectorized. */
7319 if (STMT_VINFO_LIVE_P (stmt_info
)
7320 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7322 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7327 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7333 /* Remove a group of stores (for SLP or interleaving), free their
7337 vect_remove_stores (gimple first_stmt
)
7339 gimple next
= first_stmt
;
7341 gimple_stmt_iterator next_si
;
7345 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7347 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7348 if (is_pattern_stmt_p (stmt_info
))
7349 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7350 /* Free the attached stmt_vec_info and remove the stmt. */
7351 next_si
= gsi_for_stmt (next
);
7352 unlink_stmt_vdef (next
);
7353 gsi_remove (&next_si
, true);
7354 release_defs (next
);
7355 free_stmt_vec_info (next
);
7361 /* Function new_stmt_vec_info.
7363 Create and initialize a new stmt_vec_info struct for STMT. */
7366 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7367 bb_vec_info bb_vinfo
)
7370 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7372 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7373 STMT_VINFO_STMT (res
) = stmt
;
7374 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7375 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7376 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7377 STMT_VINFO_LIVE_P (res
) = false;
7378 STMT_VINFO_VECTYPE (res
) = NULL
;
7379 STMT_VINFO_VEC_STMT (res
) = NULL
;
7380 STMT_VINFO_VECTORIZABLE (res
) = true;
7381 STMT_VINFO_IN_PATTERN_P (res
) = false;
7382 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7383 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7384 STMT_VINFO_DATA_REF (res
) = NULL
;
7386 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7387 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7388 STMT_VINFO_DR_INIT (res
) = NULL
;
7389 STMT_VINFO_DR_STEP (res
) = NULL
;
7390 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7392 if (gimple_code (stmt
) == GIMPLE_PHI
7393 && is_loop_header_bb_p (gimple_bb (stmt
)))
7394 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7396 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7398 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7399 STMT_SLP_TYPE (res
) = loop_vect
;
7400 GROUP_FIRST_ELEMENT (res
) = NULL
;
7401 GROUP_NEXT_ELEMENT (res
) = NULL
;
7402 GROUP_SIZE (res
) = 0;
7403 GROUP_STORE_COUNT (res
) = 0;
7404 GROUP_GAP (res
) = 0;
7405 GROUP_SAME_DR_STMT (res
) = NULL
;
7411 /* Create a hash table for stmt_vec_info. */
7414 init_stmt_vec_info_vec (void)
7416 gcc_assert (!stmt_vec_info_vec
.exists ());
7417 stmt_vec_info_vec
.create (50);
7421 /* Free hash table for stmt_vec_info. */
7424 free_stmt_vec_info_vec (void)
7428 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7430 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7431 gcc_assert (stmt_vec_info_vec
.exists ());
7432 stmt_vec_info_vec
.release ();
7436 /* Free stmt vectorization related info. */
7439 free_stmt_vec_info (gimple stmt
)
7441 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7446 /* Check if this statement has a related "pattern stmt"
7447 (introduced by the vectorizer during the pattern recognition
7448 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7450 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7452 stmt_vec_info patt_info
7453 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7456 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7457 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7458 gimple_set_bb (patt_stmt
, NULL
);
7459 tree lhs
= gimple_get_lhs (patt_stmt
);
7460 if (TREE_CODE (lhs
) == SSA_NAME
)
7461 release_ssa_name (lhs
);
7464 gimple_stmt_iterator si
;
7465 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7467 gimple seq_stmt
= gsi_stmt (si
);
7468 gimple_set_bb (seq_stmt
, NULL
);
7469 lhs
= gimple_get_lhs (patt_stmt
);
7470 if (TREE_CODE (lhs
) == SSA_NAME
)
7471 release_ssa_name (lhs
);
7472 free_stmt_vec_info (seq_stmt
);
7475 free_stmt_vec_info (patt_stmt
);
7479 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7480 set_vinfo_for_stmt (stmt
, NULL
);
7485 /* Function get_vectype_for_scalar_type_and_size.
7487 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7491 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7493 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7494 machine_mode simd_mode
;
7495 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7502 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7503 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7506 /* For vector types of elements whose mode precision doesn't
7507 match their types precision we use a element type of mode
7508 precision. The vectorization routines will have to make sure
7509 they support the proper result truncation/extension.
7510 We also make sure to build vector types with INTEGER_TYPE
7511 component type only. */
7512 if (INTEGRAL_TYPE_P (scalar_type
)
7513 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7514 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7515 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7516 TYPE_UNSIGNED (scalar_type
));
7518 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7519 When the component mode passes the above test simply use a type
7520 corresponding to that mode. The theory is that any use that
7521 would cause problems with this will disable vectorization anyway. */
7522 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7523 && !INTEGRAL_TYPE_P (scalar_type
))
7524 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7526 /* We can't build a vector type of elements with alignment bigger than
7528 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7529 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7530 TYPE_UNSIGNED (scalar_type
));
7532 /* If we felt back to using the mode fail if there was
7533 no scalar type for it. */
7534 if (scalar_type
== NULL_TREE
)
7537 /* If no size was supplied use the mode the target prefers. Otherwise
7538 lookup a vector mode of the specified size. */
7540 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7542 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7543 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7547 vectype
= build_vector_type (scalar_type
, nunits
);
7549 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7550 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7556 unsigned int current_vector_size
;
7558 /* Function get_vectype_for_scalar_type.
7560 Returns the vector type corresponding to SCALAR_TYPE as supported
7564 get_vectype_for_scalar_type (tree scalar_type
)
7567 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7568 current_vector_size
);
7570 && current_vector_size
== 0)
7571 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7575 /* Function get_same_sized_vectype
7577 Returns a vector type corresponding to SCALAR_TYPE of size
7578 VECTOR_TYPE if supported by the target. */
7581 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7583 return get_vectype_for_scalar_type_and_size
7584 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7587 /* Function vect_is_simple_use.
7590 LOOP_VINFO - the vect info of the loop that is being vectorized.
7591 BB_VINFO - the vect info of the basic block that is being vectorized.
7592 OPERAND - operand of STMT in the loop or bb.
7593 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7595 Returns whether a stmt with OPERAND can be vectorized.
7596 For loops, supportable operands are constants, loop invariants, and operands
7597 that are defined by the current iteration of the loop. Unsupportable
7598 operands are those that are defined by a previous iteration of the loop (as
7599 is the case in reduction/induction computations).
7600 For basic blocks, supportable operands are constants and bb invariants.
7601 For now, operands defined outside the basic block are not supported. */
7604 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7605 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7606 tree
*def
, enum vect_def_type
*dt
)
7609 stmt_vec_info stmt_vinfo
;
7610 struct loop
*loop
= NULL
;
7613 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7618 if (dump_enabled_p ())
7620 dump_printf_loc (MSG_NOTE
, vect_location
,
7621 "vect_is_simple_use: operand ");
7622 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7623 dump_printf (MSG_NOTE
, "\n");
7626 if (CONSTANT_CLASS_P (operand
))
7628 *dt
= vect_constant_def
;
7632 if (is_gimple_min_invariant (operand
))
7635 *dt
= vect_external_def
;
7639 if (TREE_CODE (operand
) == PAREN_EXPR
)
7641 if (dump_enabled_p ())
7642 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7643 operand
= TREE_OPERAND (operand
, 0);
7646 if (TREE_CODE (operand
) != SSA_NAME
)
7648 if (dump_enabled_p ())
7649 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7654 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7655 if (*def_stmt
== NULL
)
7657 if (dump_enabled_p ())
7658 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7663 if (dump_enabled_p ())
7665 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7667 dump_printf (MSG_NOTE
, "\n");
7670 /* Empty stmt is expected only in case of a function argument.
7671 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7672 if (gimple_nop_p (*def_stmt
))
7675 *dt
= vect_external_def
;
7679 bb
= gimple_bb (*def_stmt
);
7681 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7682 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7683 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7684 *dt
= vect_external_def
;
7687 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7688 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7691 if (*dt
== vect_unknown_def_type
7693 && *dt
== vect_double_reduction_def
7694 && gimple_code (stmt
) != GIMPLE_PHI
))
7696 if (dump_enabled_p ())
7697 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7698 "Unsupported pattern.\n");
7702 if (dump_enabled_p ())
7703 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
7705 switch (gimple_code (*def_stmt
))
7708 *def
= gimple_phi_result (*def_stmt
);
7712 *def
= gimple_assign_lhs (*def_stmt
);
7716 *def
= gimple_call_lhs (*def_stmt
);
7721 if (dump_enabled_p ())
7722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7723 "unsupported defining stmt:\n");
7730 /* Function vect_is_simple_use_1.
7732 Same as vect_is_simple_use_1 but also determines the vector operand
7733 type of OPERAND and stores it to *VECTYPE. If the definition of
7734 OPERAND is vect_uninitialized_def, vect_constant_def or
7735 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7736 is responsible to compute the best suited vector type for the
7740 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7741 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7742 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
7744 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
7748 /* Now get a vector type if the def is internal, otherwise supply
7749 NULL_TREE and leave it up to the caller to figure out a proper
7750 type for the use stmt. */
7751 if (*dt
== vect_internal_def
7752 || *dt
== vect_induction_def
7753 || *dt
== vect_reduction_def
7754 || *dt
== vect_double_reduction_def
7755 || *dt
== vect_nested_cycle
)
7757 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
7759 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7760 && !STMT_VINFO_RELEVANT (stmt_info
)
7761 && !STMT_VINFO_LIVE_P (stmt_info
))
7762 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7764 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7765 gcc_assert (*vectype
!= NULL_TREE
);
7767 else if (*dt
== vect_uninitialized_def
7768 || *dt
== vect_constant_def
7769 || *dt
== vect_external_def
)
7770 *vectype
= NULL_TREE
;
7778 /* Function supportable_widening_operation
7780 Check whether an operation represented by the code CODE is a
7781 widening operation that is supported by the target platform in
7782 vector form (i.e., when operating on arguments of type VECTYPE_IN
7783 producing a result of type VECTYPE_OUT).
7785 Widening operations we currently support are NOP (CONVERT), FLOAT
7786 and WIDEN_MULT. This function checks if these operations are supported
7787 by the target platform either directly (via vector tree-codes), or via
7791 - CODE1 and CODE2 are codes of vector operations to be used when
7792 vectorizing the operation, if available.
7793 - MULTI_STEP_CVT determines the number of required intermediate steps in
7794 case of multi-step conversion (like char->short->int - in that case
7795 MULTI_STEP_CVT will be 1).
7796 - INTERM_TYPES contains the intermediate type required to perform the
7797 widening operation (short in the above example). */
7800 supportable_widening_operation (enum tree_code code
, gimple stmt
,
7801 tree vectype_out
, tree vectype_in
,
7802 enum tree_code
*code1
, enum tree_code
*code2
,
7803 int *multi_step_cvt
,
7804 vec
<tree
> *interm_types
)
7806 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7807 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7808 struct loop
*vect_loop
= NULL
;
7809 machine_mode vec_mode
;
7810 enum insn_code icode1
, icode2
;
7811 optab optab1
, optab2
;
7812 tree vectype
= vectype_in
;
7813 tree wide_vectype
= vectype_out
;
7814 enum tree_code c1
, c2
;
7816 tree prev_type
, intermediate_type
;
7817 machine_mode intermediate_mode
, prev_mode
;
7818 optab optab3
, optab4
;
7820 *multi_step_cvt
= 0;
7822 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
7826 case WIDEN_MULT_EXPR
:
7827 /* The result of a vectorized widening operation usually requires
7828 two vectors (because the widened results do not fit into one vector).
7829 The generated vector results would normally be expected to be
7830 generated in the same order as in the original scalar computation,
7831 i.e. if 8 results are generated in each vector iteration, they are
7832 to be organized as follows:
7833 vect1: [res1,res2,res3,res4],
7834 vect2: [res5,res6,res7,res8].
7836 However, in the special case that the result of the widening
7837 operation is used in a reduction computation only, the order doesn't
7838 matter (because when vectorizing a reduction we change the order of
7839 the computation). Some targets can take advantage of this and
7840 generate more efficient code. For example, targets like Altivec,
7841 that support widen_mult using a sequence of {mult_even,mult_odd}
7842 generate the following vectors:
7843 vect1: [res1,res3,res5,res7],
7844 vect2: [res2,res4,res6,res8].
7846 When vectorizing outer-loops, we execute the inner-loop sequentially
7847 (each vectorized inner-loop iteration contributes to VF outer-loop
7848 iterations in parallel). We therefore don't allow to change the
7849 order of the computation in the inner-loop during outer-loop
7851 /* TODO: Another case in which order doesn't *really* matter is when we
7852 widen and then contract again, e.g. (short)((int)x * y >> 8).
7853 Normally, pack_trunc performs an even/odd permute, whereas the
7854 repack from an even/odd expansion would be an interleave, which
7855 would be significantly simpler for e.g. AVX2. */
7856 /* In any case, in order to avoid duplicating the code below, recurse
7857 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7858 are properly set up for the caller. If we fail, we'll continue with
7859 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7861 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
7862 && !nested_in_vect_loop_p (vect_loop
, stmt
)
7863 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
7864 stmt
, vectype_out
, vectype_in
,
7865 code1
, code2
, multi_step_cvt
,
7868 /* Elements in a vector with vect_used_by_reduction property cannot
7869 be reordered if the use chain with this property does not have the
7870 same operation. One such an example is s += a * b, where elements
7871 in a and b cannot be reordered. Here we check if the vector defined
7872 by STMT is only directly used in the reduction statement. */
7873 tree lhs
= gimple_assign_lhs (stmt
);
7874 use_operand_p dummy
;
7876 stmt_vec_info use_stmt_info
= NULL
;
7877 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
7878 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
7879 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
7882 c1
= VEC_WIDEN_MULT_LO_EXPR
;
7883 c2
= VEC_WIDEN_MULT_HI_EXPR
;
7886 case VEC_WIDEN_MULT_EVEN_EXPR
:
7887 /* Support the recursion induced just above. */
7888 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
7889 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
7892 case WIDEN_LSHIFT_EXPR
:
7893 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
7894 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
7898 c1
= VEC_UNPACK_LO_EXPR
;
7899 c2
= VEC_UNPACK_HI_EXPR
;
7903 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
7904 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
7907 case FIX_TRUNC_EXPR
:
7908 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7909 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7910 computing the operation. */
7917 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
7919 enum tree_code ctmp
= c1
;
7924 if (code
== FIX_TRUNC_EXPR
)
7926 /* The signedness is determined from output operand. */
7927 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7928 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
7932 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7933 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
7936 if (!optab1
|| !optab2
)
7939 vec_mode
= TYPE_MODE (vectype
);
7940 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
7941 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
7947 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7948 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7951 /* Check if it's a multi-step conversion that can be done using intermediate
7954 prev_type
= vectype
;
7955 prev_mode
= vec_mode
;
7957 if (!CONVERT_EXPR_CODE_P (code
))
7960 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7961 intermediate steps in promotion sequence. We try
7962 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7964 interm_types
->create (MAX_INTERM_CVT_STEPS
);
7965 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
7967 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
7969 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
7970 TYPE_UNSIGNED (prev_type
));
7971 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
7972 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
7974 if (!optab3
|| !optab4
7975 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
7976 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
7977 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
7978 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
7979 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
7980 == CODE_FOR_nothing
)
7981 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
7982 == CODE_FOR_nothing
))
7985 interm_types
->quick_push (intermediate_type
);
7986 (*multi_step_cvt
)++;
7988 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7989 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7992 prev_type
= intermediate_type
;
7993 prev_mode
= intermediate_mode
;
7996 interm_types
->release ();
8001 /* Function supportable_narrowing_operation
8003 Check whether an operation represented by the code CODE is a
8004 narrowing operation that is supported by the target platform in
8005 vector form (i.e., when operating on arguments of type VECTYPE_IN
8006 and producing a result of type VECTYPE_OUT).
8008 Narrowing operations we currently support are NOP (CONVERT) and
8009 FIX_TRUNC. This function checks if these operations are supported by
8010 the target platform directly via vector tree-codes.
8013 - CODE1 is the code of a vector operation to be used when
8014 vectorizing the operation, if available.
8015 - MULTI_STEP_CVT determines the number of required intermediate steps in
8016 case of multi-step conversion (like int->short->char - in that case
8017 MULTI_STEP_CVT will be 1).
8018 - INTERM_TYPES contains the intermediate type required to perform the
8019 narrowing operation (short in the above example). */
8022 supportable_narrowing_operation (enum tree_code code
,
8023 tree vectype_out
, tree vectype_in
,
8024 enum tree_code
*code1
, int *multi_step_cvt
,
8025 vec
<tree
> *interm_types
)
8027 machine_mode vec_mode
;
8028 enum insn_code icode1
;
8029 optab optab1
, interm_optab
;
8030 tree vectype
= vectype_in
;
8031 tree narrow_vectype
= vectype_out
;
8033 tree intermediate_type
;
8034 machine_mode intermediate_mode
, prev_mode
;
8038 *multi_step_cvt
= 0;
8042 c1
= VEC_PACK_TRUNC_EXPR
;
8045 case FIX_TRUNC_EXPR
:
8046 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8050 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8051 tree code and optabs used for computing the operation. */
8058 if (code
== FIX_TRUNC_EXPR
)
8059 /* The signedness is determined from output operand. */
8060 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8062 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8067 vec_mode
= TYPE_MODE (vectype
);
8068 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8073 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8076 /* Check if it's a multi-step conversion that can be done using intermediate
8078 prev_mode
= vec_mode
;
8079 if (code
== FIX_TRUNC_EXPR
)
8080 uns
= TYPE_UNSIGNED (vectype_out
);
8082 uns
= TYPE_UNSIGNED (vectype
);
8084 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8085 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8086 costly than signed. */
8087 if (code
== FIX_TRUNC_EXPR
&& uns
)
8089 enum insn_code icode2
;
8092 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8094 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8095 if (interm_optab
!= unknown_optab
8096 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8097 && insn_data
[icode1
].operand
[0].mode
8098 == insn_data
[icode2
].operand
[0].mode
)
8101 optab1
= interm_optab
;
8106 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8107 intermediate steps in promotion sequence. We try
8108 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8109 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8110 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8112 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8114 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8116 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8119 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8120 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8121 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8122 == CODE_FOR_nothing
))
8125 interm_types
->quick_push (intermediate_type
);
8126 (*multi_step_cvt
)++;
8128 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8131 prev_mode
= intermediate_mode
;
8132 optab1
= interm_optab
;
8135 interm_types
->release ();