1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
34 #include "hard-reg-set.h"
36 #include "dominance.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-ssa-alias.h"
41 #include "internal-fn.h"
43 #include "gimple-expr.h"
46 #include "gimple-iterator.h"
47 #include "gimplify-me.h"
48 #include "gimple-ssa.h"
50 #include "tree-phinodes.h"
51 #include "ssa-iterators.h"
52 #include "stringpool.h"
53 #include "tree-ssanames.h"
54 #include "tree-ssa-loop-manip.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-scalar-evolution.h"
60 #include "insn-config.h"
69 #include "recog.h" /* FIXME: for insn_data */
70 #include "insn-codes.h"
72 #include "diagnostic-core.h"
73 #include "tree-vectorizer.h"
74 #include "plugin-api.h"
79 /* For lang_hooks.types.type_for_mode. */
80 #include "langhooks.h"
82 /* Return the vectorized type for the given statement. */
85 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
87 return STMT_VINFO_VECTYPE (stmt_info
);
90 /* Return TRUE iff the given statement is in an inner loop relative to
91 the loop being vectorized. */
93 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
95 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
96 basic_block bb
= gimple_bb (stmt
);
97 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
103 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
105 return (bb
->loop_father
== loop
->inner
);
108 /* Record the cost of a statement, either by directly informing the
109 target model or by saving it in a vector for later processing.
110 Return a preliminary estimate of the statement's cost. */
113 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
114 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
115 int misalign
, enum vect_cost_model_location where
)
119 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
120 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
121 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
124 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
129 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
130 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
131 void *target_cost_data
;
134 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
136 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
138 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
143 /* Return a variable of type ELEM_TYPE[NELEMS]. */
146 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
148 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
152 /* ARRAY is an array of vectors created by create_vector_array.
153 Return an SSA_NAME for the vector in index N. The reference
154 is part of the vectorization of STMT and the vector is associated
155 with scalar destination SCALAR_DEST. */
158 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
159 tree array
, unsigned HOST_WIDE_INT n
)
161 tree vect_type
, vect
, vect_name
, array_ref
;
164 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
165 vect_type
= TREE_TYPE (TREE_TYPE (array
));
166 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
167 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
168 build_int_cst (size_type_node
, n
),
169 NULL_TREE
, NULL_TREE
);
171 new_stmt
= gimple_build_assign (vect
, array_ref
);
172 vect_name
= make_ssa_name (vect
, new_stmt
);
173 gimple_assign_set_lhs (new_stmt
, vect_name
);
174 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
179 /* ARRAY is an array of vectors created by create_vector_array.
180 Emit code to store SSA_NAME VECT in index N of the array.
181 The store is part of the vectorization of STMT. */
184 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
185 tree array
, unsigned HOST_WIDE_INT n
)
190 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
191 build_int_cst (size_type_node
, n
),
192 NULL_TREE
, NULL_TREE
);
194 new_stmt
= gimple_build_assign (array_ref
, vect
);
195 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
198 /* PTR is a pointer to an array of type TYPE. Return a representation
199 of *PTR. The memory reference replaces those in FIRST_DR
203 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
205 tree mem_ref
, alias_ptr_type
;
207 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
208 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
209 /* Arrays have the same alignment as their type. */
210 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
214 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
216 /* Function vect_mark_relevant.
218 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
221 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
222 enum vect_relevant relevant
, bool live_p
,
223 bool used_in_pattern
)
225 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
226 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
227 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
230 if (dump_enabled_p ())
231 dump_printf_loc (MSG_NOTE
, vect_location
,
232 "mark relevant %d, live %d.\n", relevant
, live_p
);
234 /* If this stmt is an original stmt in a pattern, we might need to mark its
235 related pattern stmt instead of the original stmt. However, such stmts
236 may have their own uses that are not in any pattern, in such cases the
237 stmt itself should be marked. */
238 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
241 if (!used_in_pattern
)
243 imm_use_iterator imm_iter
;
247 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
248 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
250 if (is_gimple_assign (stmt
))
251 lhs
= gimple_assign_lhs (stmt
);
253 lhs
= gimple_call_lhs (stmt
);
255 /* This use is out of pattern use, if LHS has other uses that are
256 pattern uses, we should mark the stmt itself, and not the pattern
258 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
259 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
261 if (is_gimple_debug (USE_STMT (use_p
)))
263 use_stmt
= USE_STMT (use_p
);
265 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
268 if (vinfo_for_stmt (use_stmt
)
269 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
279 /* This is the last stmt in a sequence that was detected as a
280 pattern that can potentially be vectorized. Don't mark the stmt
281 as relevant/live because it's not going to be vectorized.
282 Instead mark the pattern-stmt that replaces it. */
284 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
286 if (dump_enabled_p ())
287 dump_printf_loc (MSG_NOTE
, vect_location
,
288 "last stmt in pattern. don't mark"
289 " relevant/live.\n");
290 stmt_info
= vinfo_for_stmt (pattern_stmt
);
291 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
292 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
293 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
298 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
299 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
300 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
302 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
303 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
305 if (dump_enabled_p ())
306 dump_printf_loc (MSG_NOTE
, vect_location
,
307 "already marked relevant/live.\n");
311 worklist
->safe_push (stmt
);
315 /* Function vect_stmt_relevant_p.
317 Return true if STMT in loop that is represented by LOOP_VINFO is
318 "relevant for vectorization".
320 A stmt is considered "relevant for vectorization" if:
321 - it has uses outside the loop.
322 - it has vdefs (it alters memory).
323 - control stmts in the loop (except for the exit condition).
325 CHECKME: what other side effects would the vectorizer allow? */
328 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
329 enum vect_relevant
*relevant
, bool *live_p
)
331 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
333 imm_use_iterator imm_iter
;
337 *relevant
= vect_unused_in_scope
;
340 /* cond stmt other than loop exit cond. */
341 if (is_ctrl_stmt (stmt
)
342 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
343 != loop_exit_ctrl_vec_info_type
)
344 *relevant
= vect_used_in_scope
;
346 /* changing memory. */
347 if (gimple_code (stmt
) != GIMPLE_PHI
)
348 if (gimple_vdef (stmt
)
349 && !gimple_clobber_p (stmt
))
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_NOTE
, vect_location
,
353 "vec_stmt_relevant_p: stmt has vdefs.\n");
354 *relevant
= vect_used_in_scope
;
357 /* uses outside the loop. */
358 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
360 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
362 basic_block bb
= gimple_bb (USE_STMT (use_p
));
363 if (!flow_bb_inside_loop_p (loop
, bb
))
365 if (dump_enabled_p ())
366 dump_printf_loc (MSG_NOTE
, vect_location
,
367 "vec_stmt_relevant_p: used out of loop.\n");
369 if (is_gimple_debug (USE_STMT (use_p
)))
372 /* We expect all such uses to be in the loop exit phis
373 (because of loop closed form) */
374 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
375 gcc_assert (bb
== single_exit (loop
)->dest
);
382 return (*live_p
|| *relevant
);
386 /* Function exist_non_indexing_operands_for_use_p
388 USE is one of the uses attached to STMT. Check if USE is
389 used in STMT for anything other than indexing an array. */
392 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
395 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
397 /* USE corresponds to some operand in STMT. If there is no data
398 reference in STMT, then any operand that corresponds to USE
399 is not indexing an array. */
400 if (!STMT_VINFO_DATA_REF (stmt_info
))
403 /* STMT has a data_ref. FORNOW this means that its of one of
407 (This should have been verified in analyze_data_refs).
409 'var' in the second case corresponds to a def, not a use,
410 so USE cannot correspond to any operands that are not used
413 Therefore, all we need to check is if STMT falls into the
414 first case, and whether var corresponds to USE. */
416 if (!gimple_assign_copy_p (stmt
))
418 if (is_gimple_call (stmt
)
419 && gimple_call_internal_p (stmt
))
420 switch (gimple_call_internal_fn (stmt
))
423 operand
= gimple_call_arg (stmt
, 3);
428 operand
= gimple_call_arg (stmt
, 2);
438 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
440 operand
= gimple_assign_rhs1 (stmt
);
441 if (TREE_CODE (operand
) != SSA_NAME
)
452 Function process_use.
455 - a USE in STMT in a loop represented by LOOP_VINFO
456 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
457 that defined USE. This is done by calling mark_relevant and passing it
458 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
459 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
463 Generally, LIVE_P and RELEVANT are used to define the liveness and
464 relevance info of the DEF_STMT of this USE:
465 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
466 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
468 - case 1: If USE is used only for address computations (e.g. array indexing),
469 which does not need to be directly vectorized, then the liveness/relevance
470 of the respective DEF_STMT is left unchanged.
471 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
472 skip DEF_STMT cause it had already been processed.
473 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
474 be modified accordingly.
476 Return true if everything is as expected. Return false otherwise. */
479 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
480 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
483 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
484 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
485 stmt_vec_info dstmt_vinfo
;
486 basic_block bb
, def_bb
;
489 enum vect_def_type dt
;
491 /* case 1: we are only interested in uses that need to be vectorized. Uses
492 that are used for address computation are not considered relevant. */
493 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
496 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
498 if (dump_enabled_p ())
499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
500 "not vectorized: unsupported use in stmt.\n");
504 if (!def_stmt
|| gimple_nop_p (def_stmt
))
507 def_bb
= gimple_bb (def_stmt
);
508 if (!flow_bb_inside_loop_p (loop
, def_bb
))
510 if (dump_enabled_p ())
511 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
515 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
516 DEF_STMT must have already been processed, because this should be the
517 only way that STMT, which is a reduction-phi, was put in the worklist,
518 as there should be no other uses for DEF_STMT in the loop. So we just
519 check that everything is as expected, and we are done. */
520 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
521 bb
= gimple_bb (stmt
);
522 if (gimple_code (stmt
) == GIMPLE_PHI
523 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
524 && gimple_code (def_stmt
) != GIMPLE_PHI
525 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
526 && bb
->loop_father
== def_bb
->loop_father
)
528 if (dump_enabled_p ())
529 dump_printf_loc (MSG_NOTE
, vect_location
,
530 "reduc-stmt defining reduc-phi in the same nest.\n");
531 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
532 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
533 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
534 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
535 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
539 /* case 3a: outer-loop stmt defining an inner-loop stmt:
540 outer-loop-header-bb:
546 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
548 if (dump_enabled_p ())
549 dump_printf_loc (MSG_NOTE
, vect_location
,
550 "outer-loop def-stmt defining inner-loop stmt.\n");
554 case vect_unused_in_scope
:
555 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
556 vect_used_in_scope
: vect_unused_in_scope
;
559 case vect_used_in_outer_by_reduction
:
560 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
561 relevant
= vect_used_by_reduction
;
564 case vect_used_in_outer
:
565 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
566 relevant
= vect_used_in_scope
;
569 case vect_used_in_scope
:
577 /* case 3b: inner-loop stmt defining an outer-loop stmt:
578 outer-loop-header-bb:
582 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
584 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
586 if (dump_enabled_p ())
587 dump_printf_loc (MSG_NOTE
, vect_location
,
588 "inner-loop def-stmt defining outer-loop stmt.\n");
592 case vect_unused_in_scope
:
593 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
594 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
595 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
598 case vect_used_by_reduction
:
599 relevant
= vect_used_in_outer_by_reduction
;
602 case vect_used_in_scope
:
603 relevant
= vect_used_in_outer
;
611 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
612 is_pattern_stmt_p (stmt_vinfo
));
617 /* Function vect_mark_stmts_to_be_vectorized.
619 Not all stmts in the loop need to be vectorized. For example:
628 Stmt 1 and 3 do not need to be vectorized, because loop control and
629 addressing of vectorized data-refs are handled differently.
631 This pass detects such stmts. */
634 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
636 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
637 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
638 unsigned int nbbs
= loop
->num_nodes
;
639 gimple_stmt_iterator si
;
642 stmt_vec_info stmt_vinfo
;
646 enum vect_relevant relevant
, tmp_relevant
;
647 enum vect_def_type def_type
;
649 if (dump_enabled_p ())
650 dump_printf_loc (MSG_NOTE
, vect_location
,
651 "=== vect_mark_stmts_to_be_vectorized ===\n");
653 auto_vec
<gimple
, 64> worklist
;
655 /* 1. Init worklist. */
656 for (i
= 0; i
< nbbs
; i
++)
659 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
668 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
669 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
671 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
673 stmt
= gsi_stmt (si
);
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
677 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
680 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
681 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
685 /* 2. Process_worklist */
686 while (worklist
.length () > 0)
691 stmt
= worklist
.pop ();
692 if (dump_enabled_p ())
694 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
695 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
698 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
699 (DEF_STMT) as relevant/irrelevant and live/dead according to the
700 liveness and relevance properties of STMT. */
701 stmt_vinfo
= vinfo_for_stmt (stmt
);
702 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
703 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
705 /* Generally, the liveness and relevance properties of STMT are
706 propagated as is to the DEF_STMTs of its USEs:
707 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
708 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
710 One exception is when STMT has been identified as defining a reduction
711 variable; in this case we set the liveness/relevance as follows:
713 relevant = vect_used_by_reduction
714 This is because we distinguish between two kinds of relevant stmts -
715 those that are used by a reduction computation, and those that are
716 (also) used by a regular computation. This allows us later on to
717 identify stmts that are used solely by a reduction, and therefore the
718 order of the results that they produce does not have to be kept. */
720 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
721 tmp_relevant
= relevant
;
724 case vect_reduction_def
:
725 switch (tmp_relevant
)
727 case vect_unused_in_scope
:
728 relevant
= vect_used_by_reduction
;
731 case vect_used_by_reduction
:
732 if (gimple_code (stmt
) == GIMPLE_PHI
)
737 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
739 "unsupported use of reduction.\n");
746 case vect_nested_cycle
:
747 if (tmp_relevant
!= vect_unused_in_scope
748 && tmp_relevant
!= vect_used_in_outer_by_reduction
749 && tmp_relevant
!= vect_used_in_outer
)
751 if (dump_enabled_p ())
752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
753 "unsupported use of nested cycle.\n");
761 case vect_double_reduction_def
:
762 if (tmp_relevant
!= vect_unused_in_scope
763 && tmp_relevant
!= vect_used_by_reduction
)
765 if (dump_enabled_p ())
766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
767 "unsupported use of double reduction.\n");
779 if (is_pattern_stmt_p (stmt_vinfo
))
781 /* Pattern statements are not inserted into the code, so
782 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
783 have to scan the RHS or function arguments instead. */
784 if (is_gimple_assign (stmt
))
786 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
787 tree op
= gimple_assign_rhs1 (stmt
);
790 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
792 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
793 live_p
, relevant
, &worklist
, false)
794 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
795 live_p
, relevant
, &worklist
, false))
799 for (; i
< gimple_num_ops (stmt
); i
++)
801 op
= gimple_op (stmt
, i
);
802 if (TREE_CODE (op
) == SSA_NAME
803 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
808 else if (is_gimple_call (stmt
))
810 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
812 tree arg
= gimple_call_arg (stmt
, i
);
813 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
820 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
822 tree op
= USE_FROM_PTR (use_p
);
823 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
828 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
831 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
833 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
837 } /* while worklist */
843 /* Function vect_model_simple_cost.
845 Models cost for simple operations, i.e. those that only emit ncopies of a
846 single op. Right now, this does not account for multiple insns that could
847 be generated for the single vector op. We will handle that shortly. */
850 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
851 enum vect_def_type
*dt
,
852 stmt_vector_for_cost
*prologue_cost_vec
,
853 stmt_vector_for_cost
*body_cost_vec
)
856 int inside_cost
= 0, prologue_cost
= 0;
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info
))
862 /* FORNOW: Assuming maximum 2 args per stmts. */
863 for (i
= 0; i
< 2; i
++)
864 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
865 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
866 stmt_info
, 0, vect_prologue
);
868 /* Pass the inside-of-loop statements to the target-specific cost model. */
869 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
870 stmt_info
, 0, vect_body
);
872 if (dump_enabled_p ())
873 dump_printf_loc (MSG_NOTE
, vect_location
,
874 "vect_model_simple_cost: inside_cost = %d, "
875 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
879 /* Model cost for type demotion and promotion operations. PWR is normally
880 zero for single-step promotions and demotions. It will be one if
881 two-step promotion/demotion is required, and so on. Each additional
882 step doubles the number of instructions required. */
885 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
886 enum vect_def_type
*dt
, int pwr
)
889 int inside_cost
= 0, prologue_cost
= 0;
890 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
891 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
892 void *target_cost_data
;
894 /* The SLP costs were already calculated during SLP tree build. */
895 if (PURE_SLP_STMT (stmt_info
))
899 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
901 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
903 for (i
= 0; i
< pwr
+ 1; i
++)
905 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
907 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
908 vec_promote_demote
, stmt_info
, 0,
912 /* FORNOW: Assuming maximum 2 args per stmts. */
913 for (i
= 0; i
< 2; i
++)
914 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
915 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
916 stmt_info
, 0, vect_prologue
);
918 if (dump_enabled_p ())
919 dump_printf_loc (MSG_NOTE
, vect_location
,
920 "vect_model_promotion_demotion_cost: inside_cost = %d, "
921 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
924 /* Function vect_cost_group_size
926 For grouped load or store, return the group_size only if it is the first
927 load or store of a group, else return 1. This ensures that group size is
928 only returned once per group. */
931 vect_cost_group_size (stmt_vec_info stmt_info
)
933 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
935 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
936 return GROUP_SIZE (stmt_info
);
942 /* Function vect_model_store_cost
944 Models cost for stores. In the case of grouped accesses, one access
945 has the overhead of the grouped access attributed to it. */
948 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
949 bool store_lanes_p
, enum vect_def_type dt
,
951 stmt_vector_for_cost
*prologue_cost_vec
,
952 stmt_vector_for_cost
*body_cost_vec
)
955 unsigned int inside_cost
= 0, prologue_cost
= 0;
956 struct data_reference
*first_dr
;
959 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
960 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
961 stmt_info
, 0, vect_prologue
);
963 /* Grouped access? */
964 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
968 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
973 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
974 group_size
= vect_cost_group_size (stmt_info
);
977 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
979 /* Not a grouped access. */
983 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
986 /* We assume that the cost of a single store-lanes instruction is
987 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
988 access is instead being provided by a permute-and-store operation,
989 include the cost of the permutes. */
990 if (!store_lanes_p
&& group_size
> 1
991 && !STMT_VINFO_STRIDED_P (stmt_info
))
993 /* Uses a high and low interleave or shuffle operations for each
995 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
996 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
997 stmt_info
, 0, vect_body
);
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE
, vect_location
,
1001 "vect_model_store_cost: strided group_size = %d .\n",
1005 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1006 /* Costs of the stores. */
1007 if (STMT_VINFO_STRIDED_P (stmt_info
)
1008 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1010 /* N scalar stores plus extracting the elements. */
1011 inside_cost
+= record_stmt_cost (body_cost_vec
,
1012 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1013 scalar_store
, stmt_info
, 0, vect_body
);
1016 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1018 if (STMT_VINFO_STRIDED_P (stmt_info
))
1019 inside_cost
+= record_stmt_cost (body_cost_vec
,
1020 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1021 vec_to_scalar
, stmt_info
, 0, vect_body
);
1023 if (dump_enabled_p ())
1024 dump_printf_loc (MSG_NOTE
, vect_location
,
1025 "vect_model_store_cost: inside_cost = %d, "
1026 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1030 /* Calculate cost of DR's memory access. */
1032 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1033 unsigned int *inside_cost
,
1034 stmt_vector_for_cost
*body_cost_vec
)
1036 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1037 gimple stmt
= DR_STMT (dr
);
1038 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1040 switch (alignment_support_scheme
)
1044 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1045 vector_store
, stmt_info
, 0,
1048 if (dump_enabled_p ())
1049 dump_printf_loc (MSG_NOTE
, vect_location
,
1050 "vect_model_store_cost: aligned.\n");
1054 case dr_unaligned_supported
:
1056 /* Here, we assign an additional cost for the unaligned store. */
1057 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1058 unaligned_store
, stmt_info
,
1059 DR_MISALIGNMENT (dr
), vect_body
);
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE
, vect_location
,
1062 "vect_model_store_cost: unaligned supported by "
1067 case dr_unaligned_unsupported
:
1069 *inside_cost
= VECT_MAX_COST
;
1071 if (dump_enabled_p ())
1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1073 "vect_model_store_cost: unsupported access.\n");
1083 /* Function vect_model_load_cost
1085 Models cost for loads. In the case of grouped accesses, the last access
1086 has the overhead of the grouped access attributed to it. Since unaligned
1087 accesses are supported for loads, we also account for the costs of the
1088 access scheme chosen. */
1091 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1092 bool load_lanes_p
, slp_tree slp_node
,
1093 stmt_vector_for_cost
*prologue_cost_vec
,
1094 stmt_vector_for_cost
*body_cost_vec
)
1098 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1099 unsigned int inside_cost
= 0, prologue_cost
= 0;
1101 /* Grouped accesses? */
1102 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1103 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1105 group_size
= vect_cost_group_size (stmt_info
);
1106 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1108 /* Not a grouped access. */
1115 /* We assume that the cost of a single load-lanes instruction is
1116 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1117 access is instead being provided by a load-and-permute operation,
1118 include the cost of the permutes. */
1119 if (!load_lanes_p
&& group_size
> 1
1120 && !STMT_VINFO_STRIDED_P (stmt_info
))
1122 /* Uses an even and odd extract operations or shuffle operations
1123 for each needed permute. */
1124 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1125 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1126 stmt_info
, 0, vect_body
);
1128 if (dump_enabled_p ())
1129 dump_printf_loc (MSG_NOTE
, vect_location
,
1130 "vect_model_load_cost: strided group_size = %d .\n",
1134 /* The loads themselves. */
1135 if (STMT_VINFO_STRIDED_P (stmt_info
)
1136 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1138 /* N scalar loads plus gathering them into a vector. */
1139 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1140 inside_cost
+= record_stmt_cost (body_cost_vec
,
1141 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1142 scalar_load
, stmt_info
, 0, vect_body
);
1145 vect_get_load_cost (first_dr
, ncopies
,
1146 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1147 || group_size
> 1 || slp_node
),
1148 &inside_cost
, &prologue_cost
,
1149 prologue_cost_vec
, body_cost_vec
, true);
1150 if (STMT_VINFO_STRIDED_P (stmt_info
))
1151 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1152 stmt_info
, 0, vect_body
);
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_NOTE
, vect_location
,
1156 "vect_model_load_cost: inside_cost = %d, "
1157 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1161 /* Calculate cost of DR's memory access. */
1163 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1164 bool add_realign_cost
, unsigned int *inside_cost
,
1165 unsigned int *prologue_cost
,
1166 stmt_vector_for_cost
*prologue_cost_vec
,
1167 stmt_vector_for_cost
*body_cost_vec
,
1168 bool record_prologue_costs
)
1170 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1171 gimple stmt
= DR_STMT (dr
);
1172 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1174 switch (alignment_support_scheme
)
1178 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1179 stmt_info
, 0, vect_body
);
1181 if (dump_enabled_p ())
1182 dump_printf_loc (MSG_NOTE
, vect_location
,
1183 "vect_model_load_cost: aligned.\n");
1187 case dr_unaligned_supported
:
1189 /* Here, we assign an additional cost for the unaligned load. */
1190 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1191 unaligned_load
, stmt_info
,
1192 DR_MISALIGNMENT (dr
), vect_body
);
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE
, vect_location
,
1196 "vect_model_load_cost: unaligned supported by "
1201 case dr_explicit_realign
:
1203 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1204 vector_load
, stmt_info
, 0, vect_body
);
1205 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1206 vec_perm
, stmt_info
, 0, vect_body
);
1208 /* FIXME: If the misalignment remains fixed across the iterations of
1209 the containing loop, the following cost should be added to the
1211 if (targetm
.vectorize
.builtin_mask_for_load
)
1212 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1213 stmt_info
, 0, vect_body
);
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE
, vect_location
,
1217 "vect_model_load_cost: explicit realign\n");
1221 case dr_explicit_realign_optimized
:
1223 if (dump_enabled_p ())
1224 dump_printf_loc (MSG_NOTE
, vect_location
,
1225 "vect_model_load_cost: unaligned software "
1228 /* Unaligned software pipeline has a load of an address, an initial
1229 load, and possibly a mask operation to "prime" the loop. However,
1230 if this is an access in a group of loads, which provide grouped
1231 access, then the above cost should only be considered for one
1232 access in the group. Inside the loop, there is a load op
1233 and a realignment op. */
1235 if (add_realign_cost
&& record_prologue_costs
)
1237 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1238 vector_stmt
, stmt_info
,
1240 if (targetm
.vectorize
.builtin_mask_for_load
)
1241 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1242 vector_stmt
, stmt_info
,
1246 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1247 stmt_info
, 0, vect_body
);
1248 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1249 stmt_info
, 0, vect_body
);
1251 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE
, vect_location
,
1253 "vect_model_load_cost: explicit realign optimized"
1259 case dr_unaligned_unsupported
:
1261 *inside_cost
= VECT_MAX_COST
;
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1265 "vect_model_load_cost: unsupported access.\n");
1274 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1275 the loop preheader for the vectorized stmt STMT. */
1278 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1281 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1284 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1285 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1289 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1293 if (nested_in_vect_loop_p (loop
, stmt
))
1296 pe
= loop_preheader_edge (loop
);
1297 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1298 gcc_assert (!new_bb
);
1302 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1304 gimple_stmt_iterator gsi_bb_start
;
1306 gcc_assert (bb_vinfo
);
1307 bb
= BB_VINFO_BB (bb_vinfo
);
1308 gsi_bb_start
= gsi_after_labels (bb
);
1309 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1313 if (dump_enabled_p ())
1315 dump_printf_loc (MSG_NOTE
, vect_location
,
1316 "created new init_stmt: ");
1317 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1321 /* Function vect_init_vector.
1323 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1324 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1325 vector type a vector with all elements equal to VAL is created first.
1326 Place the initialization at BSI if it is not NULL. Otherwise, place the
1327 initialization at the loop preheader.
1328 Return the DEF of INIT_STMT.
1329 It will be used in the vectorization of STMT. */
1332 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1339 if (TREE_CODE (type
) == VECTOR_TYPE
1340 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1342 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1344 if (CONSTANT_CLASS_P (val
))
1345 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1348 new_temp
= make_ssa_name (TREE_TYPE (type
));
1349 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1350 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1354 val
= build_vector_from_val (type
, val
);
1357 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1358 init_stmt
= gimple_build_assign (new_var
, val
);
1359 new_temp
= make_ssa_name (new_var
, init_stmt
);
1360 gimple_assign_set_lhs (init_stmt
, new_temp
);
1361 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1362 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1367 /* Function vect_get_vec_def_for_operand.
1369 OP is an operand in STMT. This function returns a (vector) def that will be
1370 used in the vectorized stmt for STMT.
1372 In the case that OP is an SSA_NAME which is defined in the loop, then
1373 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1375 In case OP is an invariant or constant, a new stmt that creates a vector def
1376 needs to be introduced. */
1379 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1384 stmt_vec_info def_stmt_info
= NULL
;
1385 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1386 unsigned int nunits
;
1387 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1389 enum vect_def_type dt
;
1393 if (dump_enabled_p ())
1395 dump_printf_loc (MSG_NOTE
, vect_location
,
1396 "vect_get_vec_def_for_operand: ");
1397 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1398 dump_printf (MSG_NOTE
, "\n");
1401 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1402 &def_stmt
, &def
, &dt
);
1403 gcc_assert (is_simple_use
);
1404 if (dump_enabled_p ())
1406 int loc_printed
= 0;
1409 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1411 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1412 dump_printf (MSG_NOTE
, "\n");
1417 dump_printf (MSG_NOTE
, " def_stmt = ");
1419 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1420 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1426 /* Case 1: operand is a constant. */
1427 case vect_constant_def
:
1429 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1430 gcc_assert (vector_type
);
1431 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1436 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1437 if (dump_enabled_p ())
1438 dump_printf_loc (MSG_NOTE
, vect_location
,
1439 "Create vector_cst. nunits = %d\n", nunits
);
1441 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1444 /* Case 2: operand is defined outside the loop - loop invariant. */
1445 case vect_external_def
:
1447 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1448 gcc_assert (vector_type
);
1453 /* Create 'vec_inv = {inv,inv,..,inv}' */
1454 if (dump_enabled_p ())
1455 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1457 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1460 /* Case 3: operand is defined inside the loop. */
1461 case vect_internal_def
:
1464 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1466 /* Get the def from the vectorized stmt. */
1467 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1469 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1470 /* Get vectorized pattern statement. */
1472 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1473 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1474 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1475 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1476 gcc_assert (vec_stmt
);
1477 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1478 vec_oprnd
= PHI_RESULT (vec_stmt
);
1479 else if (is_gimple_call (vec_stmt
))
1480 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1482 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1486 /* Case 4: operand is defined by a loop header phi - reduction */
1487 case vect_reduction_def
:
1488 case vect_double_reduction_def
:
1489 case vect_nested_cycle
:
1493 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1494 loop
= (gimple_bb (def_stmt
))->loop_father
;
1496 /* Get the def before the loop */
1497 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1498 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1501 /* Case 5: operand is defined by loop-header phi - induction. */
1502 case vect_induction_def
:
1504 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1506 /* Get the def from the vectorized stmt. */
1507 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1508 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1509 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1510 vec_oprnd
= PHI_RESULT (vec_stmt
);
1512 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1522 /* Function vect_get_vec_def_for_stmt_copy
1524 Return a vector-def for an operand. This function is used when the
1525 vectorized stmt to be created (by the caller to this function) is a "copy"
1526 created in case the vectorized result cannot fit in one vector, and several
1527 copies of the vector-stmt are required. In this case the vector-def is
1528 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1529 of the stmt that defines VEC_OPRND.
1530 DT is the type of the vector def VEC_OPRND.
1533 In case the vectorization factor (VF) is bigger than the number
1534 of elements that can fit in a vectype (nunits), we have to generate
1535 more than one vector stmt to vectorize the scalar stmt. This situation
1536 arises when there are multiple data-types operated upon in the loop; the
1537 smallest data-type determines the VF, and as a result, when vectorizing
1538 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1539 vector stmt (each computing a vector of 'nunits' results, and together
1540 computing 'VF' results in each iteration). This function is called when
1541 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1542 which VF=16 and nunits=4, so the number of copies required is 4):
1544 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1546 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1547 VS1.1: vx.1 = memref1 VS1.2
1548 VS1.2: vx.2 = memref2 VS1.3
1549 VS1.3: vx.3 = memref3
1551 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1552 VSnew.1: vz1 = vx.1 + ... VSnew.2
1553 VSnew.2: vz2 = vx.2 + ... VSnew.3
1554 VSnew.3: vz3 = vx.3 + ...
1556 The vectorization of S1 is explained in vectorizable_load.
1557 The vectorization of S2:
1558 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1559 the function 'vect_get_vec_def_for_operand' is called to
1560 get the relevant vector-def for each operand of S2. For operand x it
1561 returns the vector-def 'vx.0'.
1563 To create the remaining copies of the vector-stmt (VSnew.j), this
1564 function is called to get the relevant vector-def for each operand. It is
1565 obtained from the respective VS1.j stmt, which is recorded in the
1566 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1568 For example, to obtain the vector-def 'vx.1' in order to create the
1569 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1570 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1571 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1572 and return its def ('vx.1').
1573 Overall, to create the above sequence this function will be called 3 times:
1574 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1575 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1576 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1579 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1581 gimple vec_stmt_for_operand
;
1582 stmt_vec_info def_stmt_info
;
1584 /* Do nothing; can reuse same def. */
1585 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1588 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1589 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1590 gcc_assert (def_stmt_info
);
1591 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1592 gcc_assert (vec_stmt_for_operand
);
1593 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1594 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1595 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1597 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1602 /* Get vectorized definitions for the operands to create a copy of an original
1603 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1606 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1607 vec
<tree
> *vec_oprnds0
,
1608 vec
<tree
> *vec_oprnds1
)
1610 tree vec_oprnd
= vec_oprnds0
->pop ();
1612 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1613 vec_oprnds0
->quick_push (vec_oprnd
);
1615 if (vec_oprnds1
&& vec_oprnds1
->length ())
1617 vec_oprnd
= vec_oprnds1
->pop ();
1618 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1619 vec_oprnds1
->quick_push (vec_oprnd
);
1624 /* Get vectorized definitions for OP0 and OP1.
1625 REDUC_INDEX is the index of reduction operand in case of reduction,
1626 and -1 otherwise. */
1629 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1630 vec
<tree
> *vec_oprnds0
,
1631 vec
<tree
> *vec_oprnds1
,
1632 slp_tree slp_node
, int reduc_index
)
1636 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1637 auto_vec
<tree
> ops (nops
);
1638 auto_vec
<vec
<tree
> > vec_defs (nops
);
1640 ops
.quick_push (op0
);
1642 ops
.quick_push (op1
);
1644 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1646 *vec_oprnds0
= vec_defs
[0];
1648 *vec_oprnds1
= vec_defs
[1];
1654 vec_oprnds0
->create (1);
1655 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1656 vec_oprnds0
->quick_push (vec_oprnd
);
1660 vec_oprnds1
->create (1);
1661 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1662 vec_oprnds1
->quick_push (vec_oprnd
);
1668 /* Function vect_finish_stmt_generation.
1670 Insert a new stmt. */
1673 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1674 gimple_stmt_iterator
*gsi
)
1676 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1677 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1678 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1680 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1682 if (!gsi_end_p (*gsi
)
1683 && gimple_has_mem_ops (vec_stmt
))
1685 gimple at_stmt
= gsi_stmt (*gsi
);
1686 tree vuse
= gimple_vuse (at_stmt
);
1687 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1689 tree vdef
= gimple_vdef (at_stmt
);
1690 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1691 /* If we have an SSA vuse and insert a store, update virtual
1692 SSA form to avoid triggering the renamer. Do so only
1693 if we can easily see all uses - which is what almost always
1694 happens with the way vectorized stmts are inserted. */
1695 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1696 && ((is_gimple_assign (vec_stmt
)
1697 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1698 || (is_gimple_call (vec_stmt
)
1699 && !(gimple_call_flags (vec_stmt
)
1700 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1702 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1703 gimple_set_vdef (vec_stmt
, new_vdef
);
1704 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1708 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1710 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1713 if (dump_enabled_p ())
1715 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1716 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1719 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1721 /* While EH edges will generally prevent vectorization, stmt might
1722 e.g. be in a must-not-throw region. Ensure newly created stmts
1723 that could throw are part of the same region. */
1724 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1725 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1726 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1729 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1730 a function declaration if the target has a vectorized version
1731 of the function, or NULL_TREE if the function cannot be vectorized. */
1734 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1736 tree fndecl
= gimple_call_fndecl (call
);
1738 /* We only handle functions that do not read or clobber memory -- i.e.
1739 const or novops ones. */
1740 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1744 || TREE_CODE (fndecl
) != FUNCTION_DECL
1745 || !DECL_BUILT_IN (fndecl
))
1748 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1753 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1754 gimple_stmt_iterator
*);
1757 /* Function vectorizable_mask_load_store.
1759 Check if STMT performs a conditional load or store that can be vectorized.
1760 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1761 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1762 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1765 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1766 gimple
*vec_stmt
, slp_tree slp_node
)
1768 tree vec_dest
= NULL
;
1769 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1770 stmt_vec_info prev_stmt_info
;
1771 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1772 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1773 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1774 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1775 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1779 tree dataref_ptr
= NULL_TREE
;
1781 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1785 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1786 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1787 int gather_scale
= 1;
1788 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1793 enum vect_def_type dt
;
1795 if (slp_node
!= NULL
)
1798 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1799 gcc_assert (ncopies
>= 1);
1801 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1802 mask
= gimple_call_arg (stmt
, 2);
1803 if (TYPE_PRECISION (TREE_TYPE (mask
))
1804 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1807 /* FORNOW. This restriction should be relaxed. */
1808 if (nested_in_vect_loop
&& ncopies
> 1)
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1812 "multiple types in nested loop.");
1816 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1819 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1822 if (!STMT_VINFO_DATA_REF (stmt_info
))
1825 elem_type
= TREE_TYPE (vectype
);
1827 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1830 if (STMT_VINFO_STRIDED_P (stmt_info
))
1833 if (STMT_VINFO_GATHER_P (stmt_info
))
1837 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1838 &gather_off
, &gather_scale
);
1839 gcc_assert (gather_decl
);
1840 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1841 &def_stmt
, &def
, &gather_dt
,
1842 &gather_off_vectype
))
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1846 "gather index use not simple.");
1850 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1852 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1853 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1855 if (dump_enabled_p ())
1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1857 "masked gather with integer mask not supported.");
1861 else if (tree_int_cst_compare (nested_in_vect_loop
1862 ? STMT_VINFO_DR_STEP (stmt_info
)
1863 : DR_STEP (dr
), size_zero_node
) <= 0)
1865 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1866 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1869 if (TREE_CODE (mask
) != SSA_NAME
)
1872 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1873 &def_stmt
, &def
, &dt
))
1878 tree rhs
= gimple_call_arg (stmt
, 3);
1879 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1880 &def_stmt
, &def
, &dt
))
1884 if (!vec_stmt
) /* transformation not required. */
1886 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1888 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1891 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1897 if (STMT_VINFO_GATHER_P (stmt_info
))
1899 tree vec_oprnd0
= NULL_TREE
, op
;
1900 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1901 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1902 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1903 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1904 tree mask_perm_mask
= NULL_TREE
;
1905 edge pe
= loop_preheader_edge (loop
);
1908 enum { NARROW
, NONE
, WIDEN
} modifier
;
1909 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1911 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1912 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1913 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1914 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1915 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1916 scaletype
= TREE_VALUE (arglist
);
1917 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1918 && types_compatible_p (srctype
, masktype
));
1920 if (nunits
== gather_off_nunits
)
1922 else if (nunits
== gather_off_nunits
/ 2)
1924 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1927 for (i
= 0; i
< gather_off_nunits
; ++i
)
1928 sel
[i
] = i
| nunits
;
1930 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1932 else if (nunits
== gather_off_nunits
* 2)
1934 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1937 for (i
= 0; i
< nunits
; ++i
)
1938 sel
[i
] = i
< gather_off_nunits
1939 ? i
: i
+ nunits
- gather_off_nunits
;
1941 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1943 for (i
= 0; i
< nunits
; ++i
)
1944 sel
[i
] = i
| gather_off_nunits
;
1945 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1950 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1952 ptr
= fold_convert (ptrtype
, gather_base
);
1953 if (!is_gimple_min_invariant (ptr
))
1955 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1956 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1957 gcc_assert (!new_bb
);
1960 scale
= build_int_cst (scaletype
, gather_scale
);
1962 prev_stmt_info
= NULL
;
1963 for (j
= 0; j
< ncopies
; ++j
)
1965 if (modifier
== WIDEN
&& (j
& 1))
1966 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1967 perm_mask
, stmt
, gsi
);
1970 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1973 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1975 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1977 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1978 == TYPE_VECTOR_SUBPARTS (idxtype
));
1979 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1980 var
= make_ssa_name (var
);
1981 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1983 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1984 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1988 if (mask_perm_mask
&& (j
& 1))
1989 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1990 mask_perm_mask
, stmt
, gsi
);
1994 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1997 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1998 &def_stmt
, &def
, &dt
);
1999 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2003 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2005 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2006 == TYPE_VECTOR_SUBPARTS (masktype
));
2007 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2009 var
= make_ssa_name (var
);
2010 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2012 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2013 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2019 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2022 if (!useless_type_conversion_p (vectype
, rettype
))
2024 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2025 == TYPE_VECTOR_SUBPARTS (rettype
));
2026 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2027 op
= make_ssa_name (var
, new_stmt
);
2028 gimple_call_set_lhs (new_stmt
, op
);
2029 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2030 var
= make_ssa_name (vec_dest
);
2031 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2032 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2036 var
= make_ssa_name (vec_dest
, new_stmt
);
2037 gimple_call_set_lhs (new_stmt
, var
);
2040 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2042 if (modifier
== NARROW
)
2049 var
= permute_vec_elements (prev_res
, var
,
2050 perm_mask
, stmt
, gsi
);
2051 new_stmt
= SSA_NAME_DEF_STMT (var
);
2054 if (prev_stmt_info
== NULL
)
2055 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2057 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2058 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2061 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2063 tree lhs
= gimple_call_lhs (stmt
);
2064 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2065 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2066 set_vinfo_for_stmt (stmt
, NULL
);
2067 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2068 gsi_replace (gsi
, new_stmt
, true);
2073 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2074 prev_stmt_info
= NULL
;
2075 for (i
= 0; i
< ncopies
; i
++)
2077 unsigned align
, misalign
;
2081 tree rhs
= gimple_call_arg (stmt
, 3);
2082 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2083 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2084 /* We should have catched mismatched types earlier. */
2085 gcc_assert (useless_type_conversion_p (vectype
,
2086 TREE_TYPE (vec_rhs
)));
2087 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2088 NULL_TREE
, &dummy
, gsi
,
2089 &ptr_incr
, false, &inv_p
);
2090 gcc_assert (!inv_p
);
2094 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2096 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2097 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2099 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2100 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2101 TYPE_SIZE_UNIT (vectype
));
2104 align
= TYPE_ALIGN_UNIT (vectype
);
2105 if (aligned_access_p (dr
))
2107 else if (DR_MISALIGNMENT (dr
) == -1)
2109 align
= TYPE_ALIGN_UNIT (elem_type
);
2113 misalign
= DR_MISALIGNMENT (dr
);
2114 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2117 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2118 gimple_call_arg (stmt
, 1),
2120 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2122 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2125 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2130 tree vec_mask
= NULL_TREE
;
2131 prev_stmt_info
= NULL
;
2132 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2133 for (i
= 0; i
< ncopies
; i
++)
2135 unsigned align
, misalign
;
2139 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2140 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2141 NULL_TREE
, &dummy
, gsi
,
2142 &ptr_incr
, false, &inv_p
);
2143 gcc_assert (!inv_p
);
2147 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2149 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2150 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2151 TYPE_SIZE_UNIT (vectype
));
2154 align
= TYPE_ALIGN_UNIT (vectype
);
2155 if (aligned_access_p (dr
))
2157 else if (DR_MISALIGNMENT (dr
) == -1)
2159 align
= TYPE_ALIGN_UNIT (elem_type
);
2163 misalign
= DR_MISALIGNMENT (dr
);
2164 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2167 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2168 gimple_call_arg (stmt
, 1),
2170 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2171 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2173 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2175 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2176 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2182 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2184 tree lhs
= gimple_call_lhs (stmt
);
2185 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2186 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2187 set_vinfo_for_stmt (stmt
, NULL
);
2188 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2189 gsi_replace (gsi
, new_stmt
, true);
2196 /* Function vectorizable_call.
2198 Check if GS performs a function call that can be vectorized.
2199 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2200 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2201 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2204 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2211 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2212 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2213 tree vectype_out
, vectype_in
;
2216 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2217 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2218 tree fndecl
, new_temp
, def
, rhs_type
;
2220 enum vect_def_type dt
[3]
2221 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2222 gimple new_stmt
= NULL
;
2224 vec
<tree
> vargs
= vNULL
;
2225 enum { NARROW
, NONE
, WIDEN
} modifier
;
2229 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2232 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2235 /* Is GS a vectorizable call? */
2236 stmt
= dyn_cast
<gcall
*> (gs
);
2240 if (gimple_call_internal_p (stmt
)
2241 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2242 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2243 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2246 if (gimple_call_lhs (stmt
) == NULL_TREE
2247 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2250 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2252 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2254 /* Process function arguments. */
2255 rhs_type
= NULL_TREE
;
2256 vectype_in
= NULL_TREE
;
2257 nargs
= gimple_call_num_args (stmt
);
2259 /* Bail out if the function has more than three arguments, we do not have
2260 interesting builtin functions to vectorize with more than two arguments
2261 except for fma. No arguments is also not good. */
2262 if (nargs
== 0 || nargs
> 3)
2265 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2266 if (gimple_call_internal_p (stmt
)
2267 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2270 rhs_type
= unsigned_type_node
;
2273 for (i
= 0; i
< nargs
; i
++)
2277 op
= gimple_call_arg (stmt
, i
);
2279 /* We can only handle calls with arguments of the same type. */
2281 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2285 "argument types differ.\n");
2289 rhs_type
= TREE_TYPE (op
);
2291 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2292 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2296 "use not simple.\n");
2301 vectype_in
= opvectype
;
2303 && opvectype
!= vectype_in
)
2305 if (dump_enabled_p ())
2306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2307 "argument vector types differ.\n");
2311 /* If all arguments are external or constant defs use a vector type with
2312 the same size as the output vector type. */
2314 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2316 gcc_assert (vectype_in
);
2319 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2322 "no vectype for scalar type ");
2323 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2324 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2331 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2332 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2333 if (nunits_in
== nunits_out
/ 2)
2335 else if (nunits_out
== nunits_in
)
2337 else if (nunits_out
== nunits_in
/ 2)
2342 /* For now, we only vectorize functions if a target specific builtin
2343 is available. TODO -- in some cases, it might be profitable to
2344 insert the calls for pieces of the vector, in order to be able
2345 to vectorize other operations in the loop. */
2346 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2347 if (fndecl
== NULL_TREE
)
2349 if (gimple_call_internal_p (stmt
)
2350 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2353 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2354 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2355 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2356 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2358 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2359 { 0, 1, 2, ... vf - 1 } vector. */
2360 gcc_assert (nargs
== 0);
2364 if (dump_enabled_p ())
2365 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2366 "function is not vectorizable.\n");
2371 gcc_assert (!gimple_vuse (stmt
));
2373 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2375 else if (modifier
== NARROW
)
2376 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2378 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2380 /* Sanity check: make sure that at least one copy of the vectorized stmt
2381 needs to be generated. */
2382 gcc_assert (ncopies
>= 1);
2384 if (!vec_stmt
) /* transformation not required. */
2386 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2390 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2400 scalar_dest
= gimple_call_lhs (stmt
);
2401 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2403 prev_stmt_info
= NULL
;
2407 for (j
= 0; j
< ncopies
; ++j
)
2409 /* Build argument list for the vectorized call. */
2411 vargs
.create (nargs
);
2417 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2418 vec
<tree
> vec_oprnds0
;
2420 for (i
= 0; i
< nargs
; i
++)
2421 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2422 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2423 vec_oprnds0
= vec_defs
[0];
2425 /* Arguments are ready. Create the new vector stmt. */
2426 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2429 for (k
= 0; k
< nargs
; k
++)
2431 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2432 vargs
[k
] = vec_oprndsk
[i
];
2434 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2435 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2436 gimple_call_set_lhs (new_stmt
, new_temp
);
2437 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2438 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2441 for (i
= 0; i
< nargs
; i
++)
2443 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2444 vec_oprndsi
.release ();
2449 for (i
= 0; i
< nargs
; i
++)
2451 op
= gimple_call_arg (stmt
, i
);
2454 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2457 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2459 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2462 vargs
.quick_push (vec_oprnd0
);
2465 if (gimple_call_internal_p (stmt
)
2466 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2468 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2470 for (k
= 0; k
< nunits_out
; ++k
)
2471 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2472 tree cst
= build_vector (vectype_out
, v
);
2474 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2475 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2476 new_temp
= make_ssa_name (new_var
, init_stmt
);
2477 gimple_assign_set_lhs (init_stmt
, new_temp
);
2478 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2479 new_temp
= make_ssa_name (vec_dest
);
2480 new_stmt
= gimple_build_assign (new_temp
,
2481 gimple_assign_lhs (init_stmt
));
2485 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2486 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2487 gimple_call_set_lhs (new_stmt
, new_temp
);
2489 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2492 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2494 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2496 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2502 for (j
= 0; j
< ncopies
; ++j
)
2504 /* Build argument list for the vectorized call. */
2506 vargs
.create (nargs
* 2);
2512 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2513 vec
<tree
> vec_oprnds0
;
2515 for (i
= 0; i
< nargs
; i
++)
2516 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2517 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2518 vec_oprnds0
= vec_defs
[0];
2520 /* Arguments are ready. Create the new vector stmt. */
2521 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2525 for (k
= 0; k
< nargs
; k
++)
2527 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2528 vargs
.quick_push (vec_oprndsk
[i
]);
2529 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2531 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2532 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2533 gimple_call_set_lhs (new_stmt
, new_temp
);
2534 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2535 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2538 for (i
= 0; i
< nargs
; i
++)
2540 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2541 vec_oprndsi
.release ();
2546 for (i
= 0; i
< nargs
; i
++)
2548 op
= gimple_call_arg (stmt
, i
);
2552 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2554 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2558 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2560 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2562 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2565 vargs
.quick_push (vec_oprnd0
);
2566 vargs
.quick_push (vec_oprnd1
);
2569 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2570 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2571 gimple_call_set_lhs (new_stmt
, new_temp
);
2572 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2575 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2577 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2579 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2582 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2587 /* No current target implements this case. */
2593 /* The call in STMT might prevent it from being removed in dce.
2594 We however cannot remove it here, due to the way the ssa name
2595 it defines is mapped to the new definition. So just replace
2596 rhs of the statement with something harmless. */
2601 type
= TREE_TYPE (scalar_dest
);
2602 if (is_pattern_stmt_p (stmt_info
))
2603 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2605 lhs
= gimple_call_lhs (stmt
);
2606 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2607 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2608 set_vinfo_for_stmt (stmt
, NULL
);
2609 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2610 gsi_replace (gsi
, new_stmt
, false);
2616 struct simd_call_arg_info
2620 enum vect_def_type dt
;
2621 HOST_WIDE_INT linear_step
;
2625 /* Function vectorizable_simd_clone_call.
2627 Check if STMT performs a function call that can be vectorized
2628 by calling a simd clone of the function.
2629 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2630 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2631 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2634 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2635 gimple
*vec_stmt
, slp_tree slp_node
)
2640 tree vec_oprnd0
= NULL_TREE
;
2641 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2643 unsigned int nunits
;
2644 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2645 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2646 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2647 tree fndecl
, new_temp
, def
;
2649 gimple new_stmt
= NULL
;
2651 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2652 vec
<tree
> vargs
= vNULL
;
2654 tree lhs
, rtype
, ratype
;
2655 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2657 /* Is STMT a vectorizable call? */
2658 if (!is_gimple_call (stmt
))
2661 fndecl
= gimple_call_fndecl (stmt
);
2662 if (fndecl
== NULL_TREE
)
2665 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2666 if (node
== NULL
|| node
->simd_clones
== NULL
)
2669 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2672 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2675 if (gimple_call_lhs (stmt
)
2676 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2679 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2681 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2683 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2687 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2690 /* Process function arguments. */
2691 nargs
= gimple_call_num_args (stmt
);
2693 /* Bail out if the function has zero arguments. */
2697 arginfo
.create (nargs
);
2699 for (i
= 0; i
< nargs
; i
++)
2701 simd_call_arg_info thisarginfo
;
2704 thisarginfo
.linear_step
= 0;
2705 thisarginfo
.align
= 0;
2706 thisarginfo
.op
= NULL_TREE
;
2708 op
= gimple_call_arg (stmt
, i
);
2709 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2710 &def_stmt
, &def
, &thisarginfo
.dt
,
2711 &thisarginfo
.vectype
)
2712 || thisarginfo
.dt
== vect_uninitialized_def
)
2714 if (dump_enabled_p ())
2715 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2716 "use not simple.\n");
2721 if (thisarginfo
.dt
== vect_constant_def
2722 || thisarginfo
.dt
== vect_external_def
)
2723 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2725 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2727 /* For linear arguments, the analyze phase should have saved
2728 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2729 if (i
* 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2730 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2])
2732 gcc_assert (vec_stmt
);
2733 thisarginfo
.linear_step
2734 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2]);
2736 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 1];
2737 /* If loop has been peeled for alignment, we need to adjust it. */
2738 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2739 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2742 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2743 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2];
2744 tree opt
= TREE_TYPE (thisarginfo
.op
);
2745 bias
= fold_convert (TREE_TYPE (step
), bias
);
2746 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2748 = fold_build2 (POINTER_TYPE_P (opt
)
2749 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2750 thisarginfo
.op
, bias
);
2754 && thisarginfo
.dt
!= vect_constant_def
2755 && thisarginfo
.dt
!= vect_external_def
2757 && TREE_CODE (op
) == SSA_NAME
2758 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2760 && tree_fits_shwi_p (iv
.step
))
2762 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2763 thisarginfo
.op
= iv
.base
;
2765 else if ((thisarginfo
.dt
== vect_constant_def
2766 || thisarginfo
.dt
== vect_external_def
)
2767 && POINTER_TYPE_P (TREE_TYPE (op
)))
2768 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2770 arginfo
.quick_push (thisarginfo
);
2773 unsigned int badness
= 0;
2774 struct cgraph_node
*bestn
= NULL
;
2775 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2776 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2778 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2779 n
= n
->simdclone
->next_clone
)
2781 unsigned int this_badness
= 0;
2782 if (n
->simdclone
->simdlen
2783 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2784 || n
->simdclone
->nargs
!= nargs
)
2786 if (n
->simdclone
->simdlen
2787 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2788 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2789 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2790 if (n
->simdclone
->inbranch
)
2791 this_badness
+= 2048;
2792 int target_badness
= targetm
.simd_clone
.usable (n
);
2793 if (target_badness
< 0)
2795 this_badness
+= target_badness
* 512;
2796 /* FORNOW: Have to add code to add the mask argument. */
2797 if (n
->simdclone
->inbranch
)
2799 for (i
= 0; i
< nargs
; i
++)
2801 switch (n
->simdclone
->args
[i
].arg_type
)
2803 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2804 if (!useless_type_conversion_p
2805 (n
->simdclone
->args
[i
].orig_type
,
2806 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2808 else if (arginfo
[i
].dt
== vect_constant_def
2809 || arginfo
[i
].dt
== vect_external_def
2810 || arginfo
[i
].linear_step
)
2813 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2814 if (arginfo
[i
].dt
!= vect_constant_def
2815 && arginfo
[i
].dt
!= vect_external_def
)
2818 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2819 if (arginfo
[i
].dt
== vect_constant_def
2820 || arginfo
[i
].dt
== vect_external_def
2821 || (arginfo
[i
].linear_step
2822 != n
->simdclone
->args
[i
].linear_step
))
2825 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2829 case SIMD_CLONE_ARG_TYPE_MASK
:
2832 if (i
== (size_t) -1)
2834 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2839 if (arginfo
[i
].align
)
2840 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2841 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2843 if (i
== (size_t) -1)
2845 if (bestn
== NULL
|| this_badness
< badness
)
2848 badness
= this_badness
;
2858 for (i
= 0; i
< nargs
; i
++)
2859 if ((arginfo
[i
].dt
== vect_constant_def
2860 || arginfo
[i
].dt
== vect_external_def
)
2861 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2864 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2866 if (arginfo
[i
].vectype
== NULL
2867 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2868 > bestn
->simdclone
->simdlen
))
2875 fndecl
= bestn
->decl
;
2876 nunits
= bestn
->simdclone
->simdlen
;
2877 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2879 /* If the function isn't const, only allow it in simd loops where user
2880 has asserted that at least nunits consecutive iterations can be
2881 performed using SIMD instructions. */
2882 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2883 && gimple_vuse (stmt
))
2889 /* Sanity check: make sure that at least one copy of the vectorized stmt
2890 needs to be generated. */
2891 gcc_assert (ncopies
>= 1);
2893 if (!vec_stmt
) /* transformation not required. */
2895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
2896 for (i
= 0; i
< nargs
; i
++)
2897 if (bestn
->simdclone
->args
[i
].arg_type
2898 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
2900 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 2
2902 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
2903 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
2904 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
2905 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
2906 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
2908 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_NOTE
, vect_location
,
2911 "=== vectorizable_simd_clone_call ===\n");
2912 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2923 scalar_dest
= gimple_call_lhs (stmt
);
2924 vec_dest
= NULL_TREE
;
2929 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2930 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2931 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2934 rtype
= TREE_TYPE (ratype
);
2938 prev_stmt_info
= NULL
;
2939 for (j
= 0; j
< ncopies
; ++j
)
2941 /* Build argument list for the vectorized call. */
2943 vargs
.create (nargs
);
2947 for (i
= 0; i
< nargs
; i
++)
2949 unsigned int k
, l
, m
, o
;
2951 op
= gimple_call_arg (stmt
, i
);
2952 switch (bestn
->simdclone
->args
[i
].arg_type
)
2954 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2955 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2956 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2957 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2959 if (TYPE_VECTOR_SUBPARTS (atype
)
2960 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2962 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2963 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2964 / TYPE_VECTOR_SUBPARTS (atype
));
2965 gcc_assert ((k
& (k
- 1)) == 0);
2968 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2971 vec_oprnd0
= arginfo
[i
].op
;
2972 if ((m
& (k
- 1)) == 0)
2974 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2977 arginfo
[i
].op
= vec_oprnd0
;
2979 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2981 bitsize_int ((m
& (k
- 1)) * prec
));
2983 = gimple_build_assign (make_ssa_name (atype
),
2985 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2986 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2990 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2991 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2992 gcc_assert ((k
& (k
- 1)) == 0);
2993 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2995 vec_alloc (ctor_elts
, k
);
2998 for (l
= 0; l
< k
; l
++)
3000 if (m
== 0 && l
== 0)
3002 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3005 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3007 arginfo
[i
].op
= vec_oprnd0
;
3010 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3014 vargs
.safe_push (vec_oprnd0
);
3017 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3019 = gimple_build_assign (make_ssa_name (atype
),
3021 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3022 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3027 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3028 vargs
.safe_push (op
);
3030 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3035 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3040 edge pe
= loop_preheader_edge (loop
);
3041 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3042 gcc_assert (!new_bb
);
3044 tree phi_res
= copy_ssa_name (op
);
3045 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3046 set_vinfo_for_stmt (new_phi
,
3047 new_stmt_vec_info (new_phi
, loop_vinfo
,
3049 add_phi_arg (new_phi
, arginfo
[i
].op
,
3050 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3052 = POINTER_TYPE_P (TREE_TYPE (op
))
3053 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3054 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3055 ? sizetype
: TREE_TYPE (op
);
3057 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3059 tree tcst
= wide_int_to_tree (type
, cst
);
3060 tree phi_arg
= copy_ssa_name (op
);
3062 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3063 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3064 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3065 set_vinfo_for_stmt (new_stmt
,
3066 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3068 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3070 arginfo
[i
].op
= phi_res
;
3071 vargs
.safe_push (phi_res
);
3076 = POINTER_TYPE_P (TREE_TYPE (op
))
3077 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3078 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3079 ? sizetype
: TREE_TYPE (op
);
3081 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3083 tree tcst
= wide_int_to_tree (type
, cst
);
3084 new_temp
= make_ssa_name (TREE_TYPE (op
));
3085 new_stmt
= gimple_build_assign (new_temp
, code
,
3086 arginfo
[i
].op
, tcst
);
3087 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3088 vargs
.safe_push (new_temp
);
3091 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3097 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3100 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3102 new_temp
= create_tmp_var (ratype
);
3103 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3104 == TYPE_VECTOR_SUBPARTS (rtype
))
3105 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3107 new_temp
= make_ssa_name (rtype
, new_stmt
);
3108 gimple_call_set_lhs (new_stmt
, new_temp
);
3110 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3114 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3117 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3118 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3119 gcc_assert ((k
& (k
- 1)) == 0);
3120 for (l
= 0; l
< k
; l
++)
3125 t
= build_fold_addr_expr (new_temp
);
3126 t
= build2 (MEM_REF
, vectype
, t
,
3127 build_int_cst (TREE_TYPE (t
),
3128 l
* prec
/ BITS_PER_UNIT
));
3131 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3132 size_int (prec
), bitsize_int (l
* prec
));
3134 = gimple_build_assign (make_ssa_name (vectype
), t
);
3135 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3136 if (j
== 0 && l
== 0)
3137 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3139 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3141 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3146 tree clobber
= build_constructor (ratype
, NULL
);
3147 TREE_THIS_VOLATILE (clobber
) = 1;
3148 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3149 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3153 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3155 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3156 / TYPE_VECTOR_SUBPARTS (rtype
));
3157 gcc_assert ((k
& (k
- 1)) == 0);
3158 if ((j
& (k
- 1)) == 0)
3159 vec_alloc (ret_ctor_elts
, k
);
3162 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3163 for (m
= 0; m
< o
; m
++)
3165 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3166 size_int (m
), NULL_TREE
, NULL_TREE
);
3168 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3169 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3170 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3171 gimple_assign_lhs (new_stmt
));
3173 tree clobber
= build_constructor (ratype
, NULL
);
3174 TREE_THIS_VOLATILE (clobber
) = 1;
3175 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3176 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3179 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3180 if ((j
& (k
- 1)) != k
- 1)
3182 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3184 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3185 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3187 if ((unsigned) j
== k
- 1)
3188 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3190 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3192 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3197 tree t
= build_fold_addr_expr (new_temp
);
3198 t
= build2 (MEM_REF
, vectype
, t
,
3199 build_int_cst (TREE_TYPE (t
), 0));
3201 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3202 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3203 tree clobber
= build_constructor (ratype
, NULL
);
3204 TREE_THIS_VOLATILE (clobber
) = 1;
3205 vect_finish_stmt_generation (stmt
,
3206 gimple_build_assign (new_temp
,
3212 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3214 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3216 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3221 /* The call in STMT might prevent it from being removed in dce.
3222 We however cannot remove it here, due to the way the ssa name
3223 it defines is mapped to the new definition. So just replace
3224 rhs of the statement with something harmless. */
3231 type
= TREE_TYPE (scalar_dest
);
3232 if (is_pattern_stmt_p (stmt_info
))
3233 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3235 lhs
= gimple_call_lhs (stmt
);
3236 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3239 new_stmt
= gimple_build_nop ();
3240 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3241 set_vinfo_for_stmt (stmt
, NULL
);
3242 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3243 gsi_replace (gsi
, new_stmt
, true);
3244 unlink_stmt_vdef (stmt
);
3250 /* Function vect_gen_widened_results_half
3252 Create a vector stmt whose code, type, number of arguments, and result
3253 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3254 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3255 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3256 needs to be created (DECL is a function-decl of a target-builtin).
3257 STMT is the original scalar stmt that we are vectorizing. */
3260 vect_gen_widened_results_half (enum tree_code code
,
3262 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3263 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3269 /* Generate half of the widened result: */
3270 if (code
== CALL_EXPR
)
3272 /* Target specific support */
3273 if (op_type
== binary_op
)
3274 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3276 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3277 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3278 gimple_call_set_lhs (new_stmt
, new_temp
);
3282 /* Generic support */
3283 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3284 if (op_type
!= binary_op
)
3286 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3287 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3288 gimple_assign_set_lhs (new_stmt
, new_temp
);
3290 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3296 /* Get vectorized definitions for loop-based vectorization. For the first
3297 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3298 scalar operand), and for the rest we get a copy with
3299 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3300 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3301 The vectors are collected into VEC_OPRNDS. */
3304 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3305 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3309 /* Get first vector operand. */
3310 /* All the vector operands except the very first one (that is scalar oprnd)
3312 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3313 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3315 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3317 vec_oprnds
->quick_push (vec_oprnd
);
3319 /* Get second vector operand. */
3320 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3321 vec_oprnds
->quick_push (vec_oprnd
);
3325 /* For conversion in multiple steps, continue to get operands
3328 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3332 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3333 For multi-step conversions store the resulting vectors and call the function
3337 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3338 int multi_step_cvt
, gimple stmt
,
3340 gimple_stmt_iterator
*gsi
,
3341 slp_tree slp_node
, enum tree_code code
,
3342 stmt_vec_info
*prev_stmt_info
)
3345 tree vop0
, vop1
, new_tmp
, vec_dest
;
3347 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3349 vec_dest
= vec_dsts
.pop ();
3351 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3353 /* Create demotion operation. */
3354 vop0
= (*vec_oprnds
)[i
];
3355 vop1
= (*vec_oprnds
)[i
+ 1];
3356 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3357 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3358 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3359 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3362 /* Store the resulting vector for next recursive call. */
3363 (*vec_oprnds
)[i
/2] = new_tmp
;
3366 /* This is the last step of the conversion sequence. Store the
3367 vectors in SLP_NODE or in vector info of the scalar statement
3368 (or in STMT_VINFO_RELATED_STMT chain). */
3370 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3373 if (!*prev_stmt_info
)
3374 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3376 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3378 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3383 /* For multi-step demotion operations we first generate demotion operations
3384 from the source type to the intermediate types, and then combine the
3385 results (stored in VEC_OPRNDS) in demotion operation to the destination
3389 /* At each level of recursion we have half of the operands we had at the
3391 vec_oprnds
->truncate ((i
+1)/2);
3392 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3393 stmt
, vec_dsts
, gsi
, slp_node
,
3394 VEC_PACK_TRUNC_EXPR
,
3398 vec_dsts
.quick_push (vec_dest
);
3402 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3403 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3404 the resulting vectors and call the function recursively. */
3407 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3408 vec
<tree
> *vec_oprnds1
,
3409 gimple stmt
, tree vec_dest
,
3410 gimple_stmt_iterator
*gsi
,
3411 enum tree_code code1
,
3412 enum tree_code code2
, tree decl1
,
3413 tree decl2
, int op_type
)
3416 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3417 gimple new_stmt1
, new_stmt2
;
3418 vec
<tree
> vec_tmp
= vNULL
;
3420 vec_tmp
.create (vec_oprnds0
->length () * 2);
3421 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3423 if (op_type
== binary_op
)
3424 vop1
= (*vec_oprnds1
)[i
];
3428 /* Generate the two halves of promotion operation. */
3429 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3430 op_type
, vec_dest
, gsi
, stmt
);
3431 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3432 op_type
, vec_dest
, gsi
, stmt
);
3433 if (is_gimple_call (new_stmt1
))
3435 new_tmp1
= gimple_call_lhs (new_stmt1
);
3436 new_tmp2
= gimple_call_lhs (new_stmt2
);
3440 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3441 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3444 /* Store the results for the next step. */
3445 vec_tmp
.quick_push (new_tmp1
);
3446 vec_tmp
.quick_push (new_tmp2
);
3449 vec_oprnds0
->release ();
3450 *vec_oprnds0
= vec_tmp
;
3454 /* Check if STMT performs a conversion operation, that can be vectorized.
3455 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3456 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3457 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3460 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3461 gimple
*vec_stmt
, slp_tree slp_node
)
3465 tree op0
, op1
= NULL_TREE
;
3466 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3467 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3468 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3469 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3470 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3471 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3475 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3476 gimple new_stmt
= NULL
;
3477 stmt_vec_info prev_stmt_info
;
3480 tree vectype_out
, vectype_in
;
3482 tree lhs_type
, rhs_type
;
3483 enum { NARROW
, NONE
, WIDEN
} modifier
;
3484 vec
<tree
> vec_oprnds0
= vNULL
;
3485 vec
<tree
> vec_oprnds1
= vNULL
;
3487 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3488 int multi_step_cvt
= 0;
3489 vec
<tree
> vec_dsts
= vNULL
;
3490 vec
<tree
> interm_types
= vNULL
;
3491 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3493 machine_mode rhs_mode
;
3494 unsigned short fltsz
;
3496 /* Is STMT a vectorizable conversion? */
3498 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3501 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3504 if (!is_gimple_assign (stmt
))
3507 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3510 code
= gimple_assign_rhs_code (stmt
);
3511 if (!CONVERT_EXPR_CODE_P (code
)
3512 && code
!= FIX_TRUNC_EXPR
3513 && code
!= FLOAT_EXPR
3514 && code
!= WIDEN_MULT_EXPR
3515 && code
!= WIDEN_LSHIFT_EXPR
)
3518 op_type
= TREE_CODE_LENGTH (code
);
3520 /* Check types of lhs and rhs. */
3521 scalar_dest
= gimple_assign_lhs (stmt
);
3522 lhs_type
= TREE_TYPE (scalar_dest
);
3523 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3525 op0
= gimple_assign_rhs1 (stmt
);
3526 rhs_type
= TREE_TYPE (op0
);
3528 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3529 && !((INTEGRAL_TYPE_P (lhs_type
)
3530 && INTEGRAL_TYPE_P (rhs_type
))
3531 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3532 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3535 if ((INTEGRAL_TYPE_P (lhs_type
)
3536 && (TYPE_PRECISION (lhs_type
)
3537 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3538 || (INTEGRAL_TYPE_P (rhs_type
)
3539 && (TYPE_PRECISION (rhs_type
)
3540 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3542 if (dump_enabled_p ())
3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3544 "type conversion to/from bit-precision unsupported."
3549 /* Check the operands of the operation. */
3550 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3551 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3555 "use not simple.\n");
3558 if (op_type
== binary_op
)
3562 op1
= gimple_assign_rhs2 (stmt
);
3563 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3564 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3566 if (CONSTANT_CLASS_P (op0
))
3567 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3568 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3570 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3577 "use not simple.\n");
3582 /* If op0 is an external or constant defs use a vector type of
3583 the same size as the output vector type. */
3585 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3587 gcc_assert (vectype_in
);
3590 if (dump_enabled_p ())
3592 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3593 "no vectype for scalar type ");
3594 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3595 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3601 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3602 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3603 if (nunits_in
< nunits_out
)
3605 else if (nunits_out
== nunits_in
)
3610 /* Multiple types in SLP are handled by creating the appropriate number of
3611 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3613 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3615 else if (modifier
== NARROW
)
3616 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3618 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3620 /* Sanity check: make sure that at least one copy of the vectorized stmt
3621 needs to be generated. */
3622 gcc_assert (ncopies
>= 1);
3624 /* Supportable by target? */
3628 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3630 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3635 if (dump_enabled_p ())
3636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3637 "conversion not supported by target.\n");
3641 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3642 &code1
, &code2
, &multi_step_cvt
,
3645 /* Binary widening operation can only be supported directly by the
3647 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3651 if (code
!= FLOAT_EXPR
3652 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3653 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3656 rhs_mode
= TYPE_MODE (rhs_type
);
3657 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3658 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3659 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3660 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3663 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3664 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3665 if (cvt_type
== NULL_TREE
)
3668 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3670 if (!supportable_convert_operation (code
, vectype_out
,
3671 cvt_type
, &decl1
, &codecvt1
))
3674 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3675 cvt_type
, &codecvt1
,
3676 &codecvt2
, &multi_step_cvt
,
3680 gcc_assert (multi_step_cvt
== 0);
3682 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3683 vectype_in
, &code1
, &code2
,
3684 &multi_step_cvt
, &interm_types
))
3688 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3691 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3692 codecvt2
= ERROR_MARK
;
3696 interm_types
.safe_push (cvt_type
);
3697 cvt_type
= NULL_TREE
;
3702 gcc_assert (op_type
== unary_op
);
3703 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3704 &code1
, &multi_step_cvt
,
3708 if (code
!= FIX_TRUNC_EXPR
3709 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3710 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3713 rhs_mode
= TYPE_MODE (rhs_type
);
3715 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3716 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3717 if (cvt_type
== NULL_TREE
)
3719 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3722 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3723 &code1
, &multi_step_cvt
,
3732 if (!vec_stmt
) /* transformation not required. */
3734 if (dump_enabled_p ())
3735 dump_printf_loc (MSG_NOTE
, vect_location
,
3736 "=== vectorizable_conversion ===\n");
3737 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3739 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3740 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3742 else if (modifier
== NARROW
)
3744 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3745 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3749 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3750 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3752 interm_types
.release ();
3757 if (dump_enabled_p ())
3758 dump_printf_loc (MSG_NOTE
, vect_location
,
3759 "transform conversion. ncopies = %d.\n", ncopies
);
3761 if (op_type
== binary_op
)
3763 if (CONSTANT_CLASS_P (op0
))
3764 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3765 else if (CONSTANT_CLASS_P (op1
))
3766 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3769 /* In case of multi-step conversion, we first generate conversion operations
3770 to the intermediate types, and then from that types to the final one.
3771 We create vector destinations for the intermediate type (TYPES) received
3772 from supportable_*_operation, and store them in the correct order
3773 for future use in vect_create_vectorized_*_stmts (). */
3774 vec_dsts
.create (multi_step_cvt
+ 1);
3775 vec_dest
= vect_create_destination_var (scalar_dest
,
3776 (cvt_type
&& modifier
== WIDEN
)
3777 ? cvt_type
: vectype_out
);
3778 vec_dsts
.quick_push (vec_dest
);
3782 for (i
= interm_types
.length () - 1;
3783 interm_types
.iterate (i
, &intermediate_type
); i
--)
3785 vec_dest
= vect_create_destination_var (scalar_dest
,
3787 vec_dsts
.quick_push (vec_dest
);
3792 vec_dest
= vect_create_destination_var (scalar_dest
,
3794 ? vectype_out
: cvt_type
);
3798 if (modifier
== WIDEN
)
3800 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3801 if (op_type
== binary_op
)
3802 vec_oprnds1
.create (1);
3804 else if (modifier
== NARROW
)
3805 vec_oprnds0
.create (
3806 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3808 else if (code
== WIDEN_LSHIFT_EXPR
)
3809 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3812 prev_stmt_info
= NULL
;
3816 for (j
= 0; j
< ncopies
; j
++)
3819 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3822 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3824 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3826 /* Arguments are ready, create the new vector stmt. */
3827 if (code1
== CALL_EXPR
)
3829 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3830 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3831 gimple_call_set_lhs (new_stmt
, new_temp
);
3835 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3836 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3837 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3838 gimple_assign_set_lhs (new_stmt
, new_temp
);
3841 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3843 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3846 if (!prev_stmt_info
)
3847 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3849 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3850 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3857 /* In case the vectorization factor (VF) is bigger than the number
3858 of elements that we can fit in a vectype (nunits), we have to
3859 generate more than one vector stmt - i.e - we need to "unroll"
3860 the vector stmt by a factor VF/nunits. */
3861 for (j
= 0; j
< ncopies
; j
++)
3868 if (code
== WIDEN_LSHIFT_EXPR
)
3873 /* Store vec_oprnd1 for every vector stmt to be created
3874 for SLP_NODE. We check during the analysis that all
3875 the shift arguments are the same. */
3876 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3877 vec_oprnds1
.quick_push (vec_oprnd1
);
3879 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3883 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3884 &vec_oprnds1
, slp_node
, -1);
3888 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3889 vec_oprnds0
.quick_push (vec_oprnd0
);
3890 if (op_type
== binary_op
)
3892 if (code
== WIDEN_LSHIFT_EXPR
)
3895 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3897 vec_oprnds1
.quick_push (vec_oprnd1
);
3903 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3904 vec_oprnds0
.truncate (0);
3905 vec_oprnds0
.quick_push (vec_oprnd0
);
3906 if (op_type
== binary_op
)
3908 if (code
== WIDEN_LSHIFT_EXPR
)
3911 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3913 vec_oprnds1
.truncate (0);
3914 vec_oprnds1
.quick_push (vec_oprnd1
);
3918 /* Arguments are ready. Create the new vector stmts. */
3919 for (i
= multi_step_cvt
; i
>= 0; i
--)
3921 tree this_dest
= vec_dsts
[i
];
3922 enum tree_code c1
= code1
, c2
= code2
;
3923 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3928 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3930 stmt
, this_dest
, gsi
,
3931 c1
, c2
, decl1
, decl2
,
3935 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3939 if (codecvt1
== CALL_EXPR
)
3941 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3942 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3943 gimple_call_set_lhs (new_stmt
, new_temp
);
3947 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3948 new_temp
= make_ssa_name (vec_dest
);
3949 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
3953 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3956 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3959 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3962 if (!prev_stmt_info
)
3963 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3965 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3966 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3971 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3975 /* In case the vectorization factor (VF) is bigger than the number
3976 of elements that we can fit in a vectype (nunits), we have to
3977 generate more than one vector stmt - i.e - we need to "unroll"
3978 the vector stmt by a factor VF/nunits. */
3979 for (j
= 0; j
< ncopies
; j
++)
3983 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3987 vec_oprnds0
.truncate (0);
3988 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3989 vect_pow2 (multi_step_cvt
) - 1);
3992 /* Arguments are ready. Create the new vector stmts. */
3994 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3996 if (codecvt1
== CALL_EXPR
)
3998 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3999 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4000 gimple_call_set_lhs (new_stmt
, new_temp
);
4004 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4005 new_temp
= make_ssa_name (vec_dest
);
4006 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4010 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4011 vec_oprnds0
[i
] = new_temp
;
4014 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4015 stmt
, vec_dsts
, gsi
,
4020 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4024 vec_oprnds0
.release ();
4025 vec_oprnds1
.release ();
4026 vec_dsts
.release ();
4027 interm_types
.release ();
4033 /* Function vectorizable_assignment.
4035 Check if STMT performs an assignment (copy) that can be vectorized.
4036 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4037 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4038 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4041 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4042 gimple
*vec_stmt
, slp_tree slp_node
)
4047 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4048 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4049 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4053 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4054 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4057 vec
<tree
> vec_oprnds
= vNULL
;
4059 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4060 gimple new_stmt
= NULL
;
4061 stmt_vec_info prev_stmt_info
= NULL
;
4062 enum tree_code code
;
4065 /* Multiple types in SLP are handled by creating the appropriate number of
4066 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4068 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4071 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4073 gcc_assert (ncopies
>= 1);
4075 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4078 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4081 /* Is vectorizable assignment? */
4082 if (!is_gimple_assign (stmt
))
4085 scalar_dest
= gimple_assign_lhs (stmt
);
4086 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4089 code
= gimple_assign_rhs_code (stmt
);
4090 if (gimple_assign_single_p (stmt
)
4091 || code
== PAREN_EXPR
4092 || CONVERT_EXPR_CODE_P (code
))
4093 op
= gimple_assign_rhs1 (stmt
);
4097 if (code
== VIEW_CONVERT_EXPR
)
4098 op
= TREE_OPERAND (op
, 0);
4100 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4101 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4103 if (dump_enabled_p ())
4104 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4105 "use not simple.\n");
4109 /* We can handle NOP_EXPR conversions that do not change the number
4110 of elements or the vector size. */
4111 if ((CONVERT_EXPR_CODE_P (code
)
4112 || code
== VIEW_CONVERT_EXPR
)
4114 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4115 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4116 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4119 /* We do not handle bit-precision changes. */
4120 if ((CONVERT_EXPR_CODE_P (code
)
4121 || code
== VIEW_CONVERT_EXPR
)
4122 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4123 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4124 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4125 || ((TYPE_PRECISION (TREE_TYPE (op
))
4126 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4127 /* But a conversion that does not change the bit-pattern is ok. */
4128 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4129 > TYPE_PRECISION (TREE_TYPE (op
)))
4130 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4132 if (dump_enabled_p ())
4133 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4134 "type conversion to/from bit-precision "
4139 if (!vec_stmt
) /* transformation not required. */
4141 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4142 if (dump_enabled_p ())
4143 dump_printf_loc (MSG_NOTE
, vect_location
,
4144 "=== vectorizable_assignment ===\n");
4145 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4150 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4154 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4157 for (j
= 0; j
< ncopies
; j
++)
4161 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4163 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4165 /* Arguments are ready. create the new vector stmt. */
4166 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4168 if (CONVERT_EXPR_CODE_P (code
)
4169 || code
== VIEW_CONVERT_EXPR
)
4170 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4171 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4172 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4173 gimple_assign_set_lhs (new_stmt
, new_temp
);
4174 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4176 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4183 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4185 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4187 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4190 vec_oprnds
.release ();
4195 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4196 either as shift by a scalar or by a vector. */
4199 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4202 machine_mode vec_mode
;
4207 vectype
= get_vectype_for_scalar_type (scalar_type
);
4211 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4213 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4215 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4217 || (optab_handler (optab
, TYPE_MODE (vectype
))
4218 == CODE_FOR_nothing
))
4222 vec_mode
= TYPE_MODE (vectype
);
4223 icode
= (int) optab_handler (optab
, vec_mode
);
4224 if (icode
== CODE_FOR_nothing
)
4231 /* Function vectorizable_shift.
4233 Check if STMT performs a shift operation that can be vectorized.
4234 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4235 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4236 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4239 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4240 gimple
*vec_stmt
, slp_tree slp_node
)
4244 tree op0
, op1
= NULL
;
4245 tree vec_oprnd1
= NULL_TREE
;
4246 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4248 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4249 enum tree_code code
;
4250 machine_mode vec_mode
;
4254 machine_mode optab_op2_mode
;
4257 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4258 gimple new_stmt
= NULL
;
4259 stmt_vec_info prev_stmt_info
;
4266 vec
<tree
> vec_oprnds0
= vNULL
;
4267 vec
<tree
> vec_oprnds1
= vNULL
;
4270 bool scalar_shift_arg
= true;
4271 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4274 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4277 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4280 /* Is STMT a vectorizable binary/unary operation? */
4281 if (!is_gimple_assign (stmt
))
4284 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4287 code
= gimple_assign_rhs_code (stmt
);
4289 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4290 || code
== RROTATE_EXPR
))
4293 scalar_dest
= gimple_assign_lhs (stmt
);
4294 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4295 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4296 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4300 "bit-precision shifts not supported.\n");
4304 op0
= gimple_assign_rhs1 (stmt
);
4305 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4306 &def_stmt
, &def
, &dt
[0], &vectype
))
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4310 "use not simple.\n");
4313 /* If op0 is an external or constant def use a vector type with
4314 the same size as the output vector type. */
4316 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4318 gcc_assert (vectype
);
4321 if (dump_enabled_p ())
4322 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4323 "no vectype for scalar type\n");
4327 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4328 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4329 if (nunits_out
!= nunits_in
)
4332 op1
= gimple_assign_rhs2 (stmt
);
4333 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4334 &def
, &dt
[1], &op1_vectype
))
4336 if (dump_enabled_p ())
4337 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4338 "use not simple.\n");
4343 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4347 /* Multiple types in SLP are handled by creating the appropriate number of
4348 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4350 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4353 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4355 gcc_assert (ncopies
>= 1);
4357 /* Determine whether the shift amount is a vector, or scalar. If the
4358 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4360 if (dt
[1] == vect_internal_def
&& !slp_node
)
4361 scalar_shift_arg
= false;
4362 else if (dt
[1] == vect_constant_def
4363 || dt
[1] == vect_external_def
4364 || dt
[1] == vect_internal_def
)
4366 /* In SLP, need to check whether the shift count is the same,
4367 in loops if it is a constant or invariant, it is always
4371 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4374 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4375 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4376 scalar_shift_arg
= false;
4381 if (dump_enabled_p ())
4382 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4383 "operand mode requires invariant argument.\n");
4387 /* Vector shifted by vector. */
4388 if (!scalar_shift_arg
)
4390 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4391 if (dump_enabled_p ())
4392 dump_printf_loc (MSG_NOTE
, vect_location
,
4393 "vector/vector shift/rotate found.\n");
4396 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4397 if (op1_vectype
== NULL_TREE
4398 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4400 if (dump_enabled_p ())
4401 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4402 "unusable type for last operand in"
4403 " vector/vector shift/rotate.\n");
4407 /* See if the machine has a vector shifted by scalar insn and if not
4408 then see if it has a vector shifted by vector insn. */
4411 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4413 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4415 if (dump_enabled_p ())
4416 dump_printf_loc (MSG_NOTE
, vect_location
,
4417 "vector/scalar shift/rotate found.\n");
4421 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4423 && (optab_handler (optab
, TYPE_MODE (vectype
))
4424 != CODE_FOR_nothing
))
4426 scalar_shift_arg
= false;
4428 if (dump_enabled_p ())
4429 dump_printf_loc (MSG_NOTE
, vect_location
,
4430 "vector/vector shift/rotate found.\n");
4432 /* Unlike the other binary operators, shifts/rotates have
4433 the rhs being int, instead of the same type as the lhs,
4434 so make sure the scalar is the right type if we are
4435 dealing with vectors of long long/long/short/char. */
4436 if (dt
[1] == vect_constant_def
)
4437 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4438 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4442 && TYPE_MODE (TREE_TYPE (vectype
))
4443 != TYPE_MODE (TREE_TYPE (op1
)))
4445 if (dump_enabled_p ())
4446 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4447 "unusable type for last operand in"
4448 " vector/vector shift/rotate.\n");
4451 if (vec_stmt
&& !slp_node
)
4453 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4454 op1
= vect_init_vector (stmt
, op1
,
4455 TREE_TYPE (vectype
), NULL
);
4462 /* Supportable by target? */
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4470 vec_mode
= TYPE_MODE (vectype
);
4471 icode
= (int) optab_handler (optab
, vec_mode
);
4472 if (icode
== CODE_FOR_nothing
)
4474 if (dump_enabled_p ())
4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4476 "op not supported by target.\n");
4477 /* Check only during analysis. */
4478 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4479 || (vf
< vect_min_worthwhile_factor (code
)
4482 if (dump_enabled_p ())
4483 dump_printf_loc (MSG_NOTE
, vect_location
,
4484 "proceeding using word mode.\n");
4487 /* Worthwhile without SIMD support? Check only during analysis. */
4488 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4489 && vf
< vect_min_worthwhile_factor (code
)
4492 if (dump_enabled_p ())
4493 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4494 "not worthwhile without SIMD support.\n");
4498 if (!vec_stmt
) /* transformation not required. */
4500 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_NOTE
, vect_location
,
4503 "=== vectorizable_shift ===\n");
4504 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4510 if (dump_enabled_p ())
4511 dump_printf_loc (MSG_NOTE
, vect_location
,
4512 "transform binary/unary operation.\n");
4515 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4517 prev_stmt_info
= NULL
;
4518 for (j
= 0; j
< ncopies
; j
++)
4523 if (scalar_shift_arg
)
4525 /* Vector shl and shr insn patterns can be defined with scalar
4526 operand 2 (shift operand). In this case, use constant or loop
4527 invariant op1 directly, without extending it to vector mode
4529 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4530 if (!VECTOR_MODE_P (optab_op2_mode
))
4532 if (dump_enabled_p ())
4533 dump_printf_loc (MSG_NOTE
, vect_location
,
4534 "operand 1 using scalar mode.\n");
4536 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4537 vec_oprnds1
.quick_push (vec_oprnd1
);
4540 /* Store vec_oprnd1 for every vector stmt to be created
4541 for SLP_NODE. We check during the analysis that all
4542 the shift arguments are the same.
4543 TODO: Allow different constants for different vector
4544 stmts generated for an SLP instance. */
4545 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4546 vec_oprnds1
.quick_push (vec_oprnd1
);
4551 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4552 (a special case for certain kind of vector shifts); otherwise,
4553 operand 1 should be of a vector type (the usual case). */
4555 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4558 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4562 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4564 /* Arguments are ready. Create the new vector stmt. */
4565 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4567 vop1
= vec_oprnds1
[i
];
4568 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4569 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4570 gimple_assign_set_lhs (new_stmt
, new_temp
);
4571 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4573 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4580 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4582 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4583 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4586 vec_oprnds0
.release ();
4587 vec_oprnds1
.release ();
4593 /* Function vectorizable_operation.
4595 Check if STMT performs a binary, unary or ternary operation that can
4597 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4598 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4599 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4602 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4603 gimple
*vec_stmt
, slp_tree slp_node
)
4607 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4608 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4610 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4611 enum tree_code code
;
4612 machine_mode vec_mode
;
4619 enum vect_def_type dt
[3]
4620 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4621 gimple new_stmt
= NULL
;
4622 stmt_vec_info prev_stmt_info
;
4628 vec
<tree
> vec_oprnds0
= vNULL
;
4629 vec
<tree
> vec_oprnds1
= vNULL
;
4630 vec
<tree
> vec_oprnds2
= vNULL
;
4631 tree vop0
, vop1
, vop2
;
4632 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4635 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4638 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4641 /* Is STMT a vectorizable binary/unary operation? */
4642 if (!is_gimple_assign (stmt
))
4645 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4648 code
= gimple_assign_rhs_code (stmt
);
4650 /* For pointer addition, we should use the normal plus for
4651 the vector addition. */
4652 if (code
== POINTER_PLUS_EXPR
)
4655 /* Support only unary or binary operations. */
4656 op_type
= TREE_CODE_LENGTH (code
);
4657 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4659 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4661 "num. args = %d (not unary/binary/ternary op).\n",
4666 scalar_dest
= gimple_assign_lhs (stmt
);
4667 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4669 /* Most operations cannot handle bit-precision types without extra
4671 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4672 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4673 /* Exception are bitwise binary operations. */
4674 && code
!= BIT_IOR_EXPR
4675 && code
!= BIT_XOR_EXPR
4676 && code
!= BIT_AND_EXPR
)
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4680 "bit-precision arithmetic not supported.\n");
4684 op0
= gimple_assign_rhs1 (stmt
);
4685 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4686 &def_stmt
, &def
, &dt
[0], &vectype
))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4690 "use not simple.\n");
4693 /* If op0 is an external or constant def use a vector type with
4694 the same size as the output vector type. */
4696 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4698 gcc_assert (vectype
);
4701 if (dump_enabled_p ())
4703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4704 "no vectype for scalar type ");
4705 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4707 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4713 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4714 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4715 if (nunits_out
!= nunits_in
)
4718 if (op_type
== binary_op
|| op_type
== ternary_op
)
4720 op1
= gimple_assign_rhs2 (stmt
);
4721 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4724 if (dump_enabled_p ())
4725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4726 "use not simple.\n");
4730 if (op_type
== ternary_op
)
4732 op2
= gimple_assign_rhs3 (stmt
);
4733 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4736 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4738 "use not simple.\n");
4744 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4748 /* Multiple types in SLP are handled by creating the appropriate number of
4749 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4751 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4754 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4756 gcc_assert (ncopies
>= 1);
4758 /* Shifts are handled in vectorizable_shift (). */
4759 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4760 || code
== RROTATE_EXPR
)
4763 /* Supportable by target? */
4765 vec_mode
= TYPE_MODE (vectype
);
4766 if (code
== MULT_HIGHPART_EXPR
)
4768 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4769 icode
= LAST_INSN_CODE
;
4771 icode
= CODE_FOR_nothing
;
4775 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4783 icode
= (int) optab_handler (optab
, vec_mode
);
4786 if (icode
== CODE_FOR_nothing
)
4788 if (dump_enabled_p ())
4789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4790 "op not supported by target.\n");
4791 /* Check only during analysis. */
4792 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4793 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_NOTE
, vect_location
,
4797 "proceeding using word mode.\n");
4800 /* Worthwhile without SIMD support? Check only during analysis. */
4801 if (!VECTOR_MODE_P (vec_mode
)
4803 && vf
< vect_min_worthwhile_factor (code
))
4805 if (dump_enabled_p ())
4806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4807 "not worthwhile without SIMD support.\n");
4811 if (!vec_stmt
) /* transformation not required. */
4813 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4814 if (dump_enabled_p ())
4815 dump_printf_loc (MSG_NOTE
, vect_location
,
4816 "=== vectorizable_operation ===\n");
4817 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_NOTE
, vect_location
,
4825 "transform binary/unary operation.\n");
4828 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4830 /* In case the vectorization factor (VF) is bigger than the number
4831 of elements that we can fit in a vectype (nunits), we have to generate
4832 more than one vector stmt - i.e - we need to "unroll" the
4833 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4834 from one copy of the vector stmt to the next, in the field
4835 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4836 stages to find the correct vector defs to be used when vectorizing
4837 stmts that use the defs of the current stmt. The example below
4838 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4839 we need to create 4 vectorized stmts):
4841 before vectorization:
4842 RELATED_STMT VEC_STMT
4846 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4848 RELATED_STMT VEC_STMT
4849 VS1_0: vx0 = memref0 VS1_1 -
4850 VS1_1: vx1 = memref1 VS1_2 -
4851 VS1_2: vx2 = memref2 VS1_3 -
4852 VS1_3: vx3 = memref3 - -
4853 S1: x = load - VS1_0
4856 step2: vectorize stmt S2 (done here):
4857 To vectorize stmt S2 we first need to find the relevant vector
4858 def for the first operand 'x'. This is, as usual, obtained from
4859 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4860 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4861 relevant vector def 'vx0'. Having found 'vx0' we can generate
4862 the vector stmt VS2_0, and as usual, record it in the
4863 STMT_VINFO_VEC_STMT of stmt S2.
4864 When creating the second copy (VS2_1), we obtain the relevant vector
4865 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4866 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4867 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4868 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4869 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4870 chain of stmts and pointers:
4871 RELATED_STMT VEC_STMT
4872 VS1_0: vx0 = memref0 VS1_1 -
4873 VS1_1: vx1 = memref1 VS1_2 -
4874 VS1_2: vx2 = memref2 VS1_3 -
4875 VS1_3: vx3 = memref3 - -
4876 S1: x = load - VS1_0
4877 VS2_0: vz0 = vx0 + v1 VS2_1 -
4878 VS2_1: vz1 = vx1 + v1 VS2_2 -
4879 VS2_2: vz2 = vx2 + v1 VS2_3 -
4880 VS2_3: vz3 = vx3 + v1 - -
4881 S2: z = x + 1 - VS2_0 */
4883 prev_stmt_info
= NULL
;
4884 for (j
= 0; j
< ncopies
; j
++)
4889 if (op_type
== binary_op
|| op_type
== ternary_op
)
4890 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4893 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4895 if (op_type
== ternary_op
)
4897 vec_oprnds2
.create (1);
4898 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4905 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4906 if (op_type
== ternary_op
)
4908 tree vec_oprnd
= vec_oprnds2
.pop ();
4909 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4914 /* Arguments are ready. Create the new vector stmt. */
4915 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4917 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4918 ? vec_oprnds1
[i
] : NULL_TREE
);
4919 vop2
= ((op_type
== ternary_op
)
4920 ? vec_oprnds2
[i
] : NULL_TREE
);
4921 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
4922 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4923 gimple_assign_set_lhs (new_stmt
, new_temp
);
4924 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4926 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4933 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4935 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4936 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4939 vec_oprnds0
.release ();
4940 vec_oprnds1
.release ();
4941 vec_oprnds2
.release ();
4946 /* A helper function to ensure data reference DR's base alignment
4950 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4955 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4957 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4958 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4960 if (decl_in_symtab_p (base_decl
))
4961 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
4964 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4965 DECL_USER_ALIGN (base_decl
) = 1;
4967 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4972 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4973 reversal of the vector elements. If that is impossible to do,
4977 perm_mask_for_reverse (tree vectype
)
4982 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4983 sel
= XALLOCAVEC (unsigned char, nunits
);
4985 for (i
= 0; i
< nunits
; ++i
)
4986 sel
[i
] = nunits
- 1 - i
;
4988 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4990 return vect_gen_perm_mask_checked (vectype
, sel
);
4993 /* Function vectorizable_store.
4995 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4997 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4998 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4999 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5002 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5008 tree vec_oprnd
= NULL_TREE
;
5009 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5010 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5011 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5013 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5014 struct loop
*loop
= NULL
;
5015 machine_mode vec_mode
;
5017 enum dr_alignment_support alignment_support_scheme
;
5020 enum vect_def_type dt
;
5021 stmt_vec_info prev_stmt_info
= NULL
;
5022 tree dataref_ptr
= NULL_TREE
;
5023 tree dataref_offset
= NULL_TREE
;
5024 gimple ptr_incr
= NULL
;
5025 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5028 gimple next_stmt
, first_stmt
= NULL
;
5029 bool grouped_store
= false;
5030 bool store_lanes_p
= false;
5031 unsigned int group_size
, i
;
5032 vec
<tree
> dr_chain
= vNULL
;
5033 vec
<tree
> oprnds
= vNULL
;
5034 vec
<tree
> result_chain
= vNULL
;
5036 bool negative
= false;
5037 tree offset
= NULL_TREE
;
5038 vec
<tree
> vec_oprnds
= vNULL
;
5039 bool slp
= (slp_node
!= NULL
);
5040 unsigned int vec_num
;
5041 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5045 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5047 /* Multiple types in SLP are handled by creating the appropriate number of
5048 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5050 if (slp
|| PURE_SLP_STMT (stmt_info
))
5053 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5055 gcc_assert (ncopies
>= 1);
5057 /* FORNOW. This restriction should be relaxed. */
5058 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5060 if (dump_enabled_p ())
5061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5062 "multiple types in nested loop.\n");
5066 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5069 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5072 /* Is vectorizable store? */
5074 if (!is_gimple_assign (stmt
))
5077 scalar_dest
= gimple_assign_lhs (stmt
);
5078 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5079 && is_pattern_stmt_p (stmt_info
))
5080 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5081 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5082 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5083 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5084 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5085 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5086 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5087 && TREE_CODE (scalar_dest
) != MEM_REF
)
5090 gcc_assert (gimple_assign_single_p (stmt
));
5091 op
= gimple_assign_rhs1 (stmt
);
5092 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5095 if (dump_enabled_p ())
5096 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5097 "use not simple.\n");
5101 elem_type
= TREE_TYPE (vectype
);
5102 vec_mode
= TYPE_MODE (vectype
);
5104 /* FORNOW. In some cases can vectorize even if data-type not supported
5105 (e.g. - array initialization with 0). */
5106 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5109 if (!STMT_VINFO_DATA_REF (stmt_info
))
5112 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5115 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5116 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5117 size_zero_node
) < 0;
5118 if (negative
&& ncopies
> 1)
5120 if (dump_enabled_p ())
5121 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5122 "multiple types with negative step.\n");
5127 gcc_assert (!grouped_store
);
5128 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5129 if (alignment_support_scheme
!= dr_aligned
5130 && alignment_support_scheme
!= dr_unaligned_supported
)
5132 if (dump_enabled_p ())
5133 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5134 "negative step but alignment required.\n");
5137 if (dt
!= vect_constant_def
5138 && dt
!= vect_external_def
5139 && !perm_mask_for_reverse (vectype
))
5141 if (dump_enabled_p ())
5142 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5143 "negative step and reversing not supported.\n");
5149 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5151 grouped_store
= true;
5152 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5153 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5155 && !PURE_SLP_STMT (stmt_info
)
5156 && !STMT_VINFO_STRIDED_P (stmt_info
))
5158 if (vect_store_lanes_supported (vectype
, group_size
))
5159 store_lanes_p
= true;
5160 else if (!vect_grouped_store_supported (vectype
, group_size
))
5164 if (STMT_VINFO_STRIDED_P (stmt_info
)
5165 && (slp
|| PURE_SLP_STMT (stmt_info
))
5166 && (group_size
> nunits
5167 || nunits
% group_size
!= 0))
5169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5170 "unhandled strided group store\n");
5174 if (first_stmt
== stmt
)
5176 /* STMT is the leader of the group. Check the operands of all the
5177 stmts of the group. */
5178 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5181 gcc_assert (gimple_assign_single_p (next_stmt
));
5182 op
= gimple_assign_rhs1 (next_stmt
);
5183 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5184 &def_stmt
, &def
, &dt
))
5186 if (dump_enabled_p ())
5187 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5188 "use not simple.\n");
5191 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5196 if (!vec_stmt
) /* transformation not required. */
5198 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5199 /* The SLP costs are calculated during SLP analysis. */
5200 if (!PURE_SLP_STMT (stmt_info
))
5201 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5208 ensure_base_align (stmt_info
, dr
);
5212 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5213 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5215 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5218 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5220 /* We vectorize all the stmts of the interleaving group when we
5221 reach the last stmt in the group. */
5222 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5223 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5232 grouped_store
= false;
5233 /* VEC_NUM is the number of vect stmts to be created for this
5235 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5236 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5237 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5238 op
= gimple_assign_rhs1 (first_stmt
);
5241 /* VEC_NUM is the number of vect stmts to be created for this
5243 vec_num
= group_size
;
5249 group_size
= vec_num
= 1;
5252 if (dump_enabled_p ())
5253 dump_printf_loc (MSG_NOTE
, vect_location
,
5254 "transform store. ncopies = %d\n", ncopies
);
5256 if (STMT_VINFO_STRIDED_P (stmt_info
))
5258 gimple_stmt_iterator incr_gsi
;
5264 gimple_seq stmts
= NULL
;
5265 tree stride_base
, stride_step
, alias_off
;
5269 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5272 = fold_build_pointer_plus
5273 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5274 size_binop (PLUS_EXPR
,
5275 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5276 convert_to_ptrofftype (DR_INIT(first_dr
))));
5277 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5279 /* For a store with loop-invariant (but other than power-of-2)
5280 stride (i.e. not a grouped access) like so:
5282 for (i = 0; i < n; i += stride)
5285 we generate a new induction variable and new stores from
5286 the components of the (vectorized) rhs:
5288 for (j = 0; ; j += VF*stride)
5293 array[j + stride] = tmp2;
5297 unsigned nstores
= nunits
;
5298 tree ltype
= elem_type
;
5301 nstores
= nunits
/ group_size
;
5302 if (group_size
< nunits
)
5303 ltype
= build_vector_type (elem_type
, group_size
);
5306 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5307 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5311 ivstep
= stride_step
;
5312 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5313 build_int_cst (TREE_TYPE (ivstep
),
5314 ncopies
* nstores
));
5316 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5318 create_iv (stride_base
, ivstep
, NULL
,
5319 loop
, &incr_gsi
, insert_after
,
5321 incr
= gsi_stmt (incr_gsi
);
5322 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
5324 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5326 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5328 prev_stmt_info
= NULL
;
5329 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5330 next_stmt
= first_stmt
;
5331 for (g
= 0; g
< group_size
; g
++)
5333 running_off
= offvar
;
5336 tree size
= TYPE_SIZE_UNIT (ltype
);
5337 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5339 tree newoff
= copy_ssa_name (running_off
, NULL
);
5340 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5342 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5343 running_off
= newoff
;
5345 for (j
= 0; j
< ncopies
; j
++)
5347 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5348 and first_stmt == stmt. */
5353 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5355 vec_oprnd
= vec_oprnds
[0];
5359 gcc_assert (gimple_assign_single_p (next_stmt
));
5360 op
= gimple_assign_rhs1 (next_stmt
);
5361 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5368 vec_oprnd
= vec_oprnds
[j
];
5371 vect_is_simple_use (vec_oprnd
, NULL
, loop_vinfo
,
5372 bb_vinfo
, &def_stmt
, &def
, &dt
);
5373 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5377 for (i
= 0; i
< nstores
; i
++)
5379 tree newref
, newoff
;
5380 gimple incr
, assign
;
5381 tree size
= TYPE_SIZE (ltype
);
5382 /* Extract the i'th component. */
5383 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5384 bitsize_int (i
), size
);
5385 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5388 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5392 newref
= build2 (MEM_REF
, ltype
,
5393 running_off
, alias_off
);
5395 /* And store it to *running_off. */
5396 assign
= gimple_build_assign (newref
, elem
);
5397 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5399 newoff
= copy_ssa_name (running_off
, NULL
);
5400 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5401 running_off
, stride_step
);
5402 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5404 running_off
= newoff
;
5405 if (g
== group_size
- 1
5408 if (j
== 0 && i
== 0)
5409 STMT_VINFO_VEC_STMT (stmt_info
)
5410 = *vec_stmt
= assign
;
5412 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5413 prev_stmt_info
= vinfo_for_stmt (assign
);
5417 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5422 dr_chain
.create (group_size
);
5423 oprnds
.create (group_size
);
5425 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5426 gcc_assert (alignment_support_scheme
);
5427 /* Targets with store-lane instructions must not require explicit
5429 gcc_assert (!store_lanes_p
5430 || alignment_support_scheme
== dr_aligned
5431 || alignment_support_scheme
== dr_unaligned_supported
);
5434 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5437 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5439 aggr_type
= vectype
;
5441 /* In case the vectorization factor (VF) is bigger than the number
5442 of elements that we can fit in a vectype (nunits), we have to generate
5443 more than one vector stmt - i.e - we need to "unroll" the
5444 vector stmt by a factor VF/nunits. For more details see documentation in
5445 vect_get_vec_def_for_copy_stmt. */
5447 /* In case of interleaving (non-unit grouped access):
5454 We create vectorized stores starting from base address (the access of the
5455 first stmt in the chain (S2 in the above example), when the last store stmt
5456 of the chain (S4) is reached:
5459 VS2: &base + vec_size*1 = vx0
5460 VS3: &base + vec_size*2 = vx1
5461 VS4: &base + vec_size*3 = vx3
5463 Then permutation statements are generated:
5465 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5466 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5469 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5470 (the order of the data-refs in the output of vect_permute_store_chain
5471 corresponds to the order of scalar stmts in the interleaving chain - see
5472 the documentation of vect_permute_store_chain()).
5474 In case of both multiple types and interleaving, above vector stores and
5475 permutation stmts are created for every copy. The result vector stmts are
5476 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5477 STMT_VINFO_RELATED_STMT for the next copies.
5480 prev_stmt_info
= NULL
;
5481 for (j
= 0; j
< ncopies
; j
++)
5489 /* Get vectorized arguments for SLP_NODE. */
5490 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5491 NULL
, slp_node
, -1);
5493 vec_oprnd
= vec_oprnds
[0];
5497 /* For interleaved stores we collect vectorized defs for all the
5498 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5499 used as an input to vect_permute_store_chain(), and OPRNDS as
5500 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5502 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5503 OPRNDS are of size 1. */
5504 next_stmt
= first_stmt
;
5505 for (i
= 0; i
< group_size
; i
++)
5507 /* Since gaps are not supported for interleaved stores,
5508 GROUP_SIZE is the exact number of stmts in the chain.
5509 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5510 there is no interleaving, GROUP_SIZE is 1, and only one
5511 iteration of the loop will be executed. */
5512 gcc_assert (next_stmt
5513 && gimple_assign_single_p (next_stmt
));
5514 op
= gimple_assign_rhs1 (next_stmt
);
5516 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5518 dr_chain
.quick_push (vec_oprnd
);
5519 oprnds
.quick_push (vec_oprnd
);
5520 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5524 /* We should have catched mismatched types earlier. */
5525 gcc_assert (useless_type_conversion_p (vectype
,
5526 TREE_TYPE (vec_oprnd
)));
5527 bool simd_lane_access_p
5528 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5529 if (simd_lane_access_p
5530 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5531 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5532 && integer_zerop (DR_OFFSET (first_dr
))
5533 && integer_zerop (DR_INIT (first_dr
))
5534 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5535 get_alias_set (DR_REF (first_dr
))))
5537 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5538 dataref_offset
= build_int_cst (reference_alias_ptr_type
5539 (DR_REF (first_dr
)), 0);
5544 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5545 simd_lane_access_p
? loop
: NULL
,
5546 offset
, &dummy
, gsi
, &ptr_incr
,
5547 simd_lane_access_p
, &inv_p
);
5548 gcc_assert (bb_vinfo
|| !inv_p
);
5552 /* For interleaved stores we created vectorized defs for all the
5553 defs stored in OPRNDS in the previous iteration (previous copy).
5554 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5555 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5557 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5558 OPRNDS are of size 1. */
5559 for (i
= 0; i
< group_size
; i
++)
5562 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5564 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5565 dr_chain
[i
] = vec_oprnd
;
5566 oprnds
[i
] = vec_oprnd
;
5570 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5571 TYPE_SIZE_UNIT (aggr_type
));
5573 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5574 TYPE_SIZE_UNIT (aggr_type
));
5581 /* Combine all the vectors into an array. */
5582 vec_array
= create_vector_array (vectype
, vec_num
);
5583 for (i
= 0; i
< vec_num
; i
++)
5585 vec_oprnd
= dr_chain
[i
];
5586 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5590 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5591 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5592 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5593 gimple_call_set_lhs (new_stmt
, data_ref
);
5594 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5602 result_chain
.create (group_size
);
5604 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5608 next_stmt
= first_stmt
;
5609 for (i
= 0; i
< vec_num
; i
++)
5611 unsigned align
, misalign
;
5614 /* Bump the vector pointer. */
5615 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5619 vec_oprnd
= vec_oprnds
[i
];
5620 else if (grouped_store
)
5621 /* For grouped stores vectorized defs are interleaved in
5622 vect_permute_store_chain(). */
5623 vec_oprnd
= result_chain
[i
];
5625 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
5629 : build_int_cst (reference_alias_ptr_type
5630 (DR_REF (first_dr
)), 0));
5631 align
= TYPE_ALIGN_UNIT (vectype
);
5632 if (aligned_access_p (first_dr
))
5634 else if (DR_MISALIGNMENT (first_dr
) == -1)
5636 TREE_TYPE (data_ref
)
5637 = build_aligned_type (TREE_TYPE (data_ref
),
5638 TYPE_ALIGN (elem_type
));
5639 align
= TYPE_ALIGN_UNIT (elem_type
);
5644 TREE_TYPE (data_ref
)
5645 = build_aligned_type (TREE_TYPE (data_ref
),
5646 TYPE_ALIGN (elem_type
));
5647 misalign
= DR_MISALIGNMENT (first_dr
);
5649 if (dataref_offset
== NULL_TREE
5650 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
5651 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5655 && dt
!= vect_constant_def
5656 && dt
!= vect_external_def
)
5658 tree perm_mask
= perm_mask_for_reverse (vectype
);
5660 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5662 tree new_temp
= make_ssa_name (perm_dest
);
5664 /* Generate the permute statement. */
5666 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
5667 vec_oprnd
, perm_mask
);
5668 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5670 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5671 vec_oprnd
= new_temp
;
5674 /* Arguments are ready. Create the new vector stmt. */
5675 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5676 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5681 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5689 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5691 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5692 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5696 dr_chain
.release ();
5698 result_chain
.release ();
5699 vec_oprnds
.release ();
5704 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5705 VECTOR_CST mask. No checks are made that the target platform supports the
5706 mask, so callers may wish to test can_vec_perm_p separately, or use
5707 vect_gen_perm_mask_checked. */
5710 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
5712 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5715 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5717 mask_elt_type
= lang_hooks
.types
.type_for_mode
5718 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5719 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5721 mask_elts
= XALLOCAVEC (tree
, nunits
);
5722 for (i
= nunits
- 1; i
>= 0; i
--)
5723 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5724 mask_vec
= build_vector (mask_type
, mask_elts
);
5729 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5730 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5733 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
5735 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
5736 return vect_gen_perm_mask_any (vectype
, sel
);
5739 /* Given a vector variable X and Y, that was generated for the scalar
5740 STMT, generate instructions to permute the vector elements of X and Y
5741 using permutation mask MASK_VEC, insert them at *GSI and return the
5742 permuted vector variable. */
5745 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5746 gimple_stmt_iterator
*gsi
)
5748 tree vectype
= TREE_TYPE (x
);
5749 tree perm_dest
, data_ref
;
5752 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5753 data_ref
= make_ssa_name (perm_dest
);
5755 /* Generate the permute statement. */
5756 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
5757 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5762 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5763 inserting them on the loops preheader edge. Returns true if we
5764 were successful in doing so (and thus STMT can be moved then),
5765 otherwise returns false. */
5768 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5774 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5776 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5777 if (!gimple_nop_p (def_stmt
)
5778 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5780 /* Make sure we don't need to recurse. While we could do
5781 so in simple cases when there are more complex use webs
5782 we don't have an easy way to preserve stmt order to fulfil
5783 dependencies within them. */
5786 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5788 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5790 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5791 if (!gimple_nop_p (def_stmt2
)
5792 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5802 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5804 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5805 if (!gimple_nop_p (def_stmt
)
5806 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5808 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5809 gsi_remove (&gsi
, false);
5810 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5817 /* vectorizable_load.
5819 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5821 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5822 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5823 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5826 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5827 slp_tree slp_node
, slp_instance slp_node_instance
)
5830 tree vec_dest
= NULL
;
5831 tree data_ref
= NULL
;
5832 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5833 stmt_vec_info prev_stmt_info
;
5834 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5835 struct loop
*loop
= NULL
;
5836 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5837 bool nested_in_vect_loop
= false;
5838 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5839 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5843 gimple new_stmt
= NULL
;
5845 enum dr_alignment_support alignment_support_scheme
;
5846 tree dataref_ptr
= NULL_TREE
;
5847 tree dataref_offset
= NULL_TREE
;
5848 gimple ptr_incr
= NULL
;
5849 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5851 int i
, j
, group_size
= -1, group_gap_adj
;
5852 tree msq
= NULL_TREE
, lsq
;
5853 tree offset
= NULL_TREE
;
5854 tree byte_offset
= NULL_TREE
;
5855 tree realignment_token
= NULL_TREE
;
5857 vec
<tree
> dr_chain
= vNULL
;
5858 bool grouped_load
= false;
5859 bool load_lanes_p
= false;
5862 bool negative
= false;
5863 bool compute_in_loop
= false;
5864 struct loop
*at_loop
;
5866 bool slp
= (slp_node
!= NULL
);
5867 bool slp_perm
= false;
5868 enum tree_code code
;
5869 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5872 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5873 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5874 int gather_scale
= 1;
5875 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5879 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5880 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5881 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5886 /* Multiple types in SLP are handled by creating the appropriate number of
5887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5889 if (slp
|| PURE_SLP_STMT (stmt_info
))
5892 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5894 gcc_assert (ncopies
>= 1);
5896 /* FORNOW. This restriction should be relaxed. */
5897 if (nested_in_vect_loop
&& ncopies
> 1)
5899 if (dump_enabled_p ())
5900 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5901 "multiple types in nested loop.\n");
5905 /* Invalidate assumptions made by dependence analysis when vectorization
5906 on the unrolled body effectively re-orders stmts. */
5908 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5909 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5910 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5912 if (dump_enabled_p ())
5913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5914 "cannot perform implicit CSE when unrolling "
5915 "with negative dependence distance\n");
5919 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5922 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5925 /* Is vectorizable load? */
5926 if (!is_gimple_assign (stmt
))
5929 scalar_dest
= gimple_assign_lhs (stmt
);
5930 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5933 code
= gimple_assign_rhs_code (stmt
);
5934 if (code
!= ARRAY_REF
5935 && code
!= BIT_FIELD_REF
5936 && code
!= INDIRECT_REF
5937 && code
!= COMPONENT_REF
5938 && code
!= IMAGPART_EXPR
5939 && code
!= REALPART_EXPR
5941 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5944 if (!STMT_VINFO_DATA_REF (stmt_info
))
5947 elem_type
= TREE_TYPE (vectype
);
5948 mode
= TYPE_MODE (vectype
);
5950 /* FORNOW. In some cases can vectorize even if data-type not supported
5951 (e.g. - data copies). */
5952 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5954 if (dump_enabled_p ())
5955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5956 "Aligned load, but unsupported type.\n");
5960 /* Check if the load is a part of an interleaving chain. */
5961 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5963 grouped_load
= true;
5965 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5967 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5969 /* If this is single-element interleaving with an element distance
5970 that leaves unused vector loads around punt - we at least create
5971 very sub-optimal code in that case (and blow up memory,
5973 if (first_stmt
== stmt
5974 && !GROUP_NEXT_ELEMENT (stmt_info
)
5975 && GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
5977 if (dump_enabled_p ())
5978 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5979 "single-element interleaving not supported "
5980 "for not adjacent vector loads\n");
5984 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
5987 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5989 && !PURE_SLP_STMT (stmt_info
)
5990 && !STMT_VINFO_STRIDED_P (stmt_info
))
5992 if (vect_load_lanes_supported (vectype
, group_size
))
5993 load_lanes_p
= true;
5994 else if (!vect_grouped_load_supported (vectype
, group_size
))
5998 /* Invalidate assumptions made by dependence analysis when vectorization
5999 on the unrolled body effectively re-orders stmts. */
6000 if (!PURE_SLP_STMT (stmt_info
)
6001 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6002 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6003 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6005 if (dump_enabled_p ())
6006 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6007 "cannot perform implicit CSE when performing "
6008 "group loads with negative dependence distance\n");
6012 /* Similarly when the stmt is a load that is both part of a SLP
6013 instance and a loop vectorized stmt via the same-dr mechanism
6014 we have to give up. */
6015 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6016 && (STMT_SLP_TYPE (stmt_info
)
6017 != STMT_SLP_TYPE (vinfo_for_stmt
6018 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6020 if (dump_enabled_p ())
6021 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6022 "conflicting SLP types for CSEd load\n");
6028 if (STMT_VINFO_GATHER_P (stmt_info
))
6032 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
6033 &gather_off
, &gather_scale
);
6034 gcc_assert (gather_decl
);
6035 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
6036 &def_stmt
, &def
, &gather_dt
,
6037 &gather_off_vectype
))
6039 if (dump_enabled_p ())
6040 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6041 "gather index use not simple.\n");
6045 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6048 && (slp
|| PURE_SLP_STMT (stmt_info
)))
6049 && (group_size
> nunits
6050 || nunits
% group_size
!= 0))
6052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6053 "unhandled strided group load\n");
6059 negative
= tree_int_cst_compare (nested_in_vect_loop
6060 ? STMT_VINFO_DR_STEP (stmt_info
)
6062 size_zero_node
) < 0;
6063 if (negative
&& ncopies
> 1)
6065 if (dump_enabled_p ())
6066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6067 "multiple types with negative step.\n");
6075 if (dump_enabled_p ())
6076 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6077 "negative step for group load not supported"
6081 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6082 if (alignment_support_scheme
!= dr_aligned
6083 && alignment_support_scheme
!= dr_unaligned_supported
)
6085 if (dump_enabled_p ())
6086 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6087 "negative step but alignment required.\n");
6090 if (!perm_mask_for_reverse (vectype
))
6092 if (dump_enabled_p ())
6093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6094 "negative step and reversing not supported."
6101 if (!vec_stmt
) /* transformation not required. */
6103 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6104 /* The SLP costs are calculated during SLP analysis. */
6105 if (!PURE_SLP_STMT (stmt_info
))
6106 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6111 if (dump_enabled_p ())
6112 dump_printf_loc (MSG_NOTE
, vect_location
,
6113 "transform load. ncopies = %d\n", ncopies
);
6117 ensure_base_align (stmt_info
, dr
);
6119 if (STMT_VINFO_GATHER_P (stmt_info
))
6121 tree vec_oprnd0
= NULL_TREE
, op
;
6122 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6123 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6124 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6125 edge pe
= loop_preheader_edge (loop
);
6128 enum { NARROW
, NONE
, WIDEN
} modifier
;
6129 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6131 if (nunits
== gather_off_nunits
)
6133 else if (nunits
== gather_off_nunits
/ 2)
6135 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6138 for (i
= 0; i
< gather_off_nunits
; ++i
)
6139 sel
[i
] = i
| nunits
;
6141 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6143 else if (nunits
== gather_off_nunits
* 2)
6145 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6148 for (i
= 0; i
< nunits
; ++i
)
6149 sel
[i
] = i
< gather_off_nunits
6150 ? i
: i
+ nunits
- gather_off_nunits
;
6152 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6158 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6159 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6160 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6161 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6162 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6163 scaletype
= TREE_VALUE (arglist
);
6164 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6166 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6168 ptr
= fold_convert (ptrtype
, gather_base
);
6169 if (!is_gimple_min_invariant (ptr
))
6171 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6172 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6173 gcc_assert (!new_bb
);
6176 /* Currently we support only unconditional gather loads,
6177 so mask should be all ones. */
6178 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6179 mask
= build_int_cst (masktype
, -1);
6180 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6182 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6183 mask
= build_vector_from_val (masktype
, mask
);
6184 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6186 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6190 for (j
= 0; j
< 6; ++j
)
6192 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6193 mask
= build_real (TREE_TYPE (masktype
), r
);
6194 mask
= build_vector_from_val (masktype
, mask
);
6195 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6200 scale
= build_int_cst (scaletype
, gather_scale
);
6202 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6203 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6204 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6208 for (j
= 0; j
< 6; ++j
)
6210 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6211 merge
= build_real (TREE_TYPE (rettype
), r
);
6215 merge
= build_vector_from_val (rettype
, merge
);
6216 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6218 prev_stmt_info
= NULL
;
6219 for (j
= 0; j
< ncopies
; ++j
)
6221 if (modifier
== WIDEN
&& (j
& 1))
6222 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6223 perm_mask
, stmt
, gsi
);
6226 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
6229 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6231 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6233 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6234 == TYPE_VECTOR_SUBPARTS (idxtype
));
6235 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
6236 var
= make_ssa_name (var
);
6237 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6239 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6240 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6245 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6247 if (!useless_type_conversion_p (vectype
, rettype
))
6249 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6250 == TYPE_VECTOR_SUBPARTS (rettype
));
6251 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
6252 op
= make_ssa_name (var
, new_stmt
);
6253 gimple_call_set_lhs (new_stmt
, op
);
6254 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6255 var
= make_ssa_name (vec_dest
);
6256 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6258 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6262 var
= make_ssa_name (vec_dest
, new_stmt
);
6263 gimple_call_set_lhs (new_stmt
, var
);
6266 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6268 if (modifier
== NARROW
)
6275 var
= permute_vec_elements (prev_res
, var
,
6276 perm_mask
, stmt
, gsi
);
6277 new_stmt
= SSA_NAME_DEF_STMT (var
);
6280 if (prev_stmt_info
== NULL
)
6281 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6283 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6284 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6288 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6290 gimple_stmt_iterator incr_gsi
;
6296 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6297 gimple_seq stmts
= NULL
;
6298 tree stride_base
, stride_step
, alias_off
;
6300 gcc_assert (!nested_in_vect_loop
);
6302 if (slp
&& grouped_load
)
6303 first_dr
= STMT_VINFO_DATA_REF
6304 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6309 = fold_build_pointer_plus
6310 (DR_BASE_ADDRESS (first_dr
),
6311 size_binop (PLUS_EXPR
,
6312 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6313 convert_to_ptrofftype (DR_INIT (first_dr
))));
6314 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6316 /* For a load with loop-invariant (but other than power-of-2)
6317 stride (i.e. not a grouped access) like so:
6319 for (i = 0; i < n; i += stride)
6322 we generate a new induction variable and new accesses to
6323 form a new vector (or vectors, depending on ncopies):
6325 for (j = 0; ; j += VF*stride)
6327 tmp2 = array[j + stride];
6329 vectemp = {tmp1, tmp2, ...}
6332 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6333 build_int_cst (TREE_TYPE (stride_step
), vf
));
6335 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6337 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6338 loop
, &incr_gsi
, insert_after
,
6340 incr
= gsi_stmt (incr_gsi
);
6341 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6343 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6344 &stmts
, true, NULL_TREE
);
6346 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6348 prev_stmt_info
= NULL
;
6349 running_off
= offvar
;
6350 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6351 int nloads
= nunits
;
6352 tree ltype
= TREE_TYPE (vectype
);
6353 auto_vec
<tree
> dr_chain
;
6356 nloads
= nunits
/ group_size
;
6357 if (group_size
< nunits
)
6358 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6361 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6362 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6364 dr_chain
.create (ncopies
);
6366 for (j
= 0; j
< ncopies
; j
++)
6372 vec_alloc (v
, nloads
);
6373 for (i
= 0; i
< nloads
; i
++)
6375 tree newref
, newoff
;
6377 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6379 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6382 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6383 newoff
= copy_ssa_name (running_off
);
6384 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6385 running_off
, stride_step
);
6386 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6388 running_off
= newoff
;
6391 vec_inv
= build_constructor (vectype
, v
);
6392 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6393 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6397 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6398 build2 (MEM_REF
, ltype
,
6399 running_off
, alias_off
));
6400 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6402 tree newoff
= copy_ssa_name (running_off
);
6403 gimple incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6404 running_off
, stride_step
);
6405 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6407 running_off
= newoff
;
6412 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6414 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6419 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6421 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6422 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6426 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6427 slp_node_instance
, false);
6433 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6435 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6436 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6437 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6439 /* Check if the chain of loads is already vectorized. */
6440 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6441 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6442 ??? But we can only do so if there is exactly one
6443 as we have no way to get at the rest. Leave the CSE
6445 ??? With the group load eventually participating
6446 in multiple different permutations (having multiple
6447 slp nodes which refer to the same group) the CSE
6448 is even wrong code. See PR56270. */
6451 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6454 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6455 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6458 /* VEC_NUM is the number of vect stmts to be created for this group. */
6461 grouped_load
= false;
6462 /* For SLP permutation support we need to load the whole group,
6463 not only the number of vector stmts the permutation result
6466 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6468 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6469 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6472 vec_num
= group_size
;
6478 group_size
= vec_num
= 1;
6482 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6483 gcc_assert (alignment_support_scheme
);
6484 /* Targets with load-lane instructions must not require explicit
6486 gcc_assert (!load_lanes_p
6487 || alignment_support_scheme
== dr_aligned
6488 || alignment_support_scheme
== dr_unaligned_supported
);
6490 /* In case the vectorization factor (VF) is bigger than the number
6491 of elements that we can fit in a vectype (nunits), we have to generate
6492 more than one vector stmt - i.e - we need to "unroll" the
6493 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6494 from one copy of the vector stmt to the next, in the field
6495 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6496 stages to find the correct vector defs to be used when vectorizing
6497 stmts that use the defs of the current stmt. The example below
6498 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6499 need to create 4 vectorized stmts):
6501 before vectorization:
6502 RELATED_STMT VEC_STMT
6506 step 1: vectorize stmt S1:
6507 We first create the vector stmt VS1_0, and, as usual, record a
6508 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6509 Next, we create the vector stmt VS1_1, and record a pointer to
6510 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6511 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6513 RELATED_STMT VEC_STMT
6514 VS1_0: vx0 = memref0 VS1_1 -
6515 VS1_1: vx1 = memref1 VS1_2 -
6516 VS1_2: vx2 = memref2 VS1_3 -
6517 VS1_3: vx3 = memref3 - -
6518 S1: x = load - VS1_0
6521 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6522 information we recorded in RELATED_STMT field is used to vectorize
6525 /* In case of interleaving (non-unit grouped access):
6532 Vectorized loads are created in the order of memory accesses
6533 starting from the access of the first stmt of the chain:
6536 VS2: vx1 = &base + vec_size*1
6537 VS3: vx3 = &base + vec_size*2
6538 VS4: vx4 = &base + vec_size*3
6540 Then permutation statements are generated:
6542 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6543 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6546 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6547 (the order of the data-refs in the output of vect_permute_load_chain
6548 corresponds to the order of scalar stmts in the interleaving chain - see
6549 the documentation of vect_permute_load_chain()).
6550 The generation of permutation stmts and recording them in
6551 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6553 In case of both multiple types and interleaving, the vector loads and
6554 permutation stmts above are created for every copy. The result vector
6555 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6556 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6558 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6559 on a target that supports unaligned accesses (dr_unaligned_supported)
6560 we generate the following code:
6564 p = p + indx * vectype_size;
6569 Otherwise, the data reference is potentially unaligned on a target that
6570 does not support unaligned accesses (dr_explicit_realign_optimized) -
6571 then generate the following code, in which the data in each iteration is
6572 obtained by two vector loads, one from the previous iteration, and one
6573 from the current iteration:
6575 msq_init = *(floor(p1))
6576 p2 = initial_addr + VS - 1;
6577 realignment_token = call target_builtin;
6580 p2 = p2 + indx * vectype_size
6582 vec_dest = realign_load (msq, lsq, realignment_token)
6587 /* If the misalignment remains the same throughout the execution of the
6588 loop, we can create the init_addr and permutation mask at the loop
6589 preheader. Otherwise, it needs to be created inside the loop.
6590 This can only occur when vectorizing memory accesses in the inner-loop
6591 nested within an outer-loop that is being vectorized. */
6593 if (nested_in_vect_loop
6594 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6595 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6597 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6598 compute_in_loop
= true;
6601 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6602 || alignment_support_scheme
== dr_explicit_realign
)
6603 && !compute_in_loop
)
6605 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6606 alignment_support_scheme
, NULL_TREE
,
6608 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6610 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6611 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6619 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6622 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6624 aggr_type
= vectype
;
6626 prev_stmt_info
= NULL
;
6627 for (j
= 0; j
< ncopies
; j
++)
6629 /* 1. Create the vector or array pointer update chain. */
6632 bool simd_lane_access_p
6633 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6634 if (simd_lane_access_p
6635 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6636 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6637 && integer_zerop (DR_OFFSET (first_dr
))
6638 && integer_zerop (DR_INIT (first_dr
))
6639 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6640 get_alias_set (DR_REF (first_dr
)))
6641 && (alignment_support_scheme
== dr_aligned
6642 || alignment_support_scheme
== dr_unaligned_supported
))
6644 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6645 dataref_offset
= build_int_cst (reference_alias_ptr_type
6646 (DR_REF (first_dr
)), 0);
6651 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6652 offset
, &dummy
, gsi
, &ptr_incr
,
6653 simd_lane_access_p
, &inv_p
,
6656 else if (dataref_offset
)
6657 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6658 TYPE_SIZE_UNIT (aggr_type
));
6660 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6661 TYPE_SIZE_UNIT (aggr_type
));
6663 if (grouped_load
|| slp_perm
)
6664 dr_chain
.create (vec_num
);
6670 vec_array
= create_vector_array (vectype
, vec_num
);
6673 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6674 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6675 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6676 gimple_call_set_lhs (new_stmt
, vec_array
);
6677 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6679 /* Extract each vector into an SSA_NAME. */
6680 for (i
= 0; i
< vec_num
; i
++)
6682 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6684 dr_chain
.quick_push (new_temp
);
6687 /* Record the mapping between SSA_NAMEs and statements. */
6688 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6692 for (i
= 0; i
< vec_num
; i
++)
6695 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6698 /* 2. Create the vector-load in the loop. */
6699 switch (alignment_support_scheme
)
6702 case dr_unaligned_supported
:
6704 unsigned int align
, misalign
;
6707 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
6710 : build_int_cst (reference_alias_ptr_type
6711 (DR_REF (first_dr
)), 0));
6712 align
= TYPE_ALIGN_UNIT (vectype
);
6713 if (alignment_support_scheme
== dr_aligned
)
6715 gcc_assert (aligned_access_p (first_dr
));
6718 else if (DR_MISALIGNMENT (first_dr
) == -1)
6720 TREE_TYPE (data_ref
)
6721 = build_aligned_type (TREE_TYPE (data_ref
),
6722 TYPE_ALIGN (elem_type
));
6723 align
= TYPE_ALIGN_UNIT (elem_type
);
6728 TREE_TYPE (data_ref
)
6729 = build_aligned_type (TREE_TYPE (data_ref
),
6730 TYPE_ALIGN (elem_type
));
6731 misalign
= DR_MISALIGNMENT (first_dr
);
6733 if (dataref_offset
== NULL_TREE
6734 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6735 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6739 case dr_explicit_realign
:
6743 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
6745 if (compute_in_loop
)
6746 msq
= vect_setup_realignment (first_stmt
, gsi
,
6748 dr_explicit_realign
,
6751 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
6752 ptr
= copy_ssa_name (dataref_ptr
);
6754 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
6755 new_stmt
= gimple_build_assign
6756 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
6758 (TREE_TYPE (dataref_ptr
),
6759 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6760 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6762 = build2 (MEM_REF
, vectype
, ptr
,
6763 build_int_cst (reference_alias_ptr_type
6764 (DR_REF (first_dr
)), 0));
6765 vec_dest
= vect_create_destination_var (scalar_dest
,
6767 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6768 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6769 gimple_assign_set_lhs (new_stmt
, new_temp
);
6770 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6771 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6772 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6775 bump
= size_binop (MULT_EXPR
, vs
,
6776 TYPE_SIZE_UNIT (elem_type
));
6777 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
6778 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6779 new_stmt
= gimple_build_assign
6780 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
6783 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6784 ptr
= copy_ssa_name (ptr
, new_stmt
);
6785 gimple_assign_set_lhs (new_stmt
, ptr
);
6786 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6788 = build2 (MEM_REF
, vectype
, ptr
,
6789 build_int_cst (reference_alias_ptr_type
6790 (DR_REF (first_dr
)), 0));
6793 case dr_explicit_realign_optimized
:
6794 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
6795 new_temp
= copy_ssa_name (dataref_ptr
);
6797 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
6798 new_stmt
= gimple_build_assign
6799 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
6801 (TREE_TYPE (dataref_ptr
),
6802 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6803 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6805 = build2 (MEM_REF
, vectype
, new_temp
,
6806 build_int_cst (reference_alias_ptr_type
6807 (DR_REF (first_dr
)), 0));
6812 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6813 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6814 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6815 gimple_assign_set_lhs (new_stmt
, new_temp
);
6816 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6818 /* 3. Handle explicit realignment if necessary/supported.
6820 vec_dest = realign_load (msq, lsq, realignment_token) */
6821 if (alignment_support_scheme
== dr_explicit_realign_optimized
6822 || alignment_support_scheme
== dr_explicit_realign
)
6824 lsq
= gimple_assign_lhs (new_stmt
);
6825 if (!realignment_token
)
6826 realignment_token
= dataref_ptr
;
6827 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6828 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
6829 msq
, lsq
, realignment_token
);
6830 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6831 gimple_assign_set_lhs (new_stmt
, new_temp
);
6832 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6834 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6837 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6838 add_phi_arg (phi
, lsq
,
6839 loop_latch_edge (containing_loop
),
6845 /* 4. Handle invariant-load. */
6846 if (inv_p
&& !bb_vinfo
)
6848 gcc_assert (!grouped_load
);
6849 /* If we have versioned for aliasing or the loop doesn't
6850 have any data dependencies that would preclude this,
6851 then we are sure this is a loop invariant load and
6852 thus we can insert it on the preheader edge. */
6853 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6854 && !nested_in_vect_loop
6855 && hoist_defs_of_uses (stmt
, loop
))
6857 if (dump_enabled_p ())
6859 dump_printf_loc (MSG_NOTE
, vect_location
,
6860 "hoisting out of the vectorized "
6862 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6864 tree tem
= copy_ssa_name (scalar_dest
);
6865 gsi_insert_on_edge_immediate
6866 (loop_preheader_edge (loop
),
6867 gimple_build_assign (tem
,
6869 (gimple_assign_rhs1 (stmt
))));
6870 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6874 gimple_stmt_iterator gsi2
= *gsi
;
6876 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6879 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6880 set_vinfo_for_stmt (new_stmt
,
6881 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6887 tree perm_mask
= perm_mask_for_reverse (vectype
);
6888 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6889 perm_mask
, stmt
, gsi
);
6890 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6893 /* Collect vector loads and later create their permutation in
6894 vect_transform_grouped_load (). */
6895 if (grouped_load
|| slp_perm
)
6896 dr_chain
.quick_push (new_temp
);
6898 /* Store vector loads in the corresponding SLP_NODE. */
6899 if (slp
&& !slp_perm
)
6900 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6902 /* Bump the vector pointer to account for a gap or for excess
6903 elements loaded for a permuted SLP load. */
6904 if (group_gap_adj
!= 0)
6908 = wide_int_to_tree (sizetype
,
6909 wi::smul (TYPE_SIZE_UNIT (elem_type
),
6910 group_gap_adj
, &ovf
));
6911 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6916 if (slp
&& !slp_perm
)
6921 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6922 slp_node_instance
, false))
6924 dr_chain
.release ();
6933 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6934 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6939 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6941 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6942 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6945 dr_chain
.release ();
6951 /* Function vect_is_simple_cond.
6954 LOOP - the loop that is being vectorized.
6955 COND - Condition that is checked for simple use.
6958 *COMP_VECTYPE - the vector type for the comparison.
6960 Returns whether a COND can be vectorized. Checks whether
6961 condition operands are supportable using vec_is_simple_use. */
6964 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6965 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6969 enum vect_def_type dt
;
6970 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6972 if (!COMPARISON_CLASS_P (cond
))
6975 lhs
= TREE_OPERAND (cond
, 0);
6976 rhs
= TREE_OPERAND (cond
, 1);
6978 if (TREE_CODE (lhs
) == SSA_NAME
)
6980 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6981 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6982 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6985 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6986 && TREE_CODE (lhs
) != FIXED_CST
)
6989 if (TREE_CODE (rhs
) == SSA_NAME
)
6991 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6992 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6993 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6996 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6997 && TREE_CODE (rhs
) != FIXED_CST
)
7000 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7004 /* vectorizable_condition.
7006 Check if STMT is conditional modify expression that can be vectorized.
7007 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7008 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7011 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7012 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7013 else caluse if it is 2).
7015 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7018 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
7019 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
7022 tree scalar_dest
= NULL_TREE
;
7023 tree vec_dest
= NULL_TREE
;
7024 tree cond_expr
, then_clause
, else_clause
;
7025 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7026 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7027 tree comp_vectype
= NULL_TREE
;
7028 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7029 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7030 tree vec_compare
, vec_cond_expr
;
7032 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7034 enum vect_def_type dt
, dts
[4];
7035 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7037 enum tree_code code
;
7038 stmt_vec_info prev_stmt_info
= NULL
;
7040 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7041 vec
<tree
> vec_oprnds0
= vNULL
;
7042 vec
<tree
> vec_oprnds1
= vNULL
;
7043 vec
<tree
> vec_oprnds2
= vNULL
;
7044 vec
<tree
> vec_oprnds3
= vNULL
;
7047 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7050 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7052 gcc_assert (ncopies
>= 1);
7053 if (reduc_index
&& ncopies
> 1)
7054 return false; /* FORNOW */
7056 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7059 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7062 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7063 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7067 /* FORNOW: not yet supported. */
7068 if (STMT_VINFO_LIVE_P (stmt_info
))
7070 if (dump_enabled_p ())
7071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7072 "value used after loop.\n");
7076 /* Is vectorizable conditional operation? */
7077 if (!is_gimple_assign (stmt
))
7080 code
= gimple_assign_rhs_code (stmt
);
7082 if (code
!= COND_EXPR
)
7085 cond_expr
= gimple_assign_rhs1 (stmt
);
7086 then_clause
= gimple_assign_rhs2 (stmt
);
7087 else_clause
= gimple_assign_rhs3 (stmt
);
7089 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
7094 if (TREE_CODE (then_clause
) == SSA_NAME
)
7096 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
7097 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7098 &then_def_stmt
, &def
, &dt
))
7101 else if (TREE_CODE (then_clause
) != INTEGER_CST
7102 && TREE_CODE (then_clause
) != REAL_CST
7103 && TREE_CODE (then_clause
) != FIXED_CST
)
7106 if (TREE_CODE (else_clause
) == SSA_NAME
)
7108 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
7109 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7110 &else_def_stmt
, &def
, &dt
))
7113 else if (TREE_CODE (else_clause
) != INTEGER_CST
7114 && TREE_CODE (else_clause
) != REAL_CST
7115 && TREE_CODE (else_clause
) != FIXED_CST
)
7118 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
7119 /* The result of a vector comparison should be signed type. */
7120 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
7121 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
7122 if (vec_cmp_type
== NULL_TREE
)
7127 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7128 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7135 vec_oprnds0
.create (1);
7136 vec_oprnds1
.create (1);
7137 vec_oprnds2
.create (1);
7138 vec_oprnds3
.create (1);
7142 scalar_dest
= gimple_assign_lhs (stmt
);
7143 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7145 /* Handle cond expr. */
7146 for (j
= 0; j
< ncopies
; j
++)
7148 gassign
*new_stmt
= NULL
;
7153 auto_vec
<tree
, 4> ops
;
7154 auto_vec
<vec
<tree
>, 4> vec_defs
;
7156 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7157 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7158 ops
.safe_push (then_clause
);
7159 ops
.safe_push (else_clause
);
7160 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7161 vec_oprnds3
= vec_defs
.pop ();
7162 vec_oprnds2
= vec_defs
.pop ();
7163 vec_oprnds1
= vec_defs
.pop ();
7164 vec_oprnds0
= vec_defs
.pop ();
7167 vec_defs
.release ();
7173 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7175 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
7176 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
7179 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7181 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
7182 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
7183 if (reduc_index
== 1)
7184 vec_then_clause
= reduc_def
;
7187 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7189 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
7190 NULL
, >emp
, &def
, &dts
[2]);
7192 if (reduc_index
== 2)
7193 vec_else_clause
= reduc_def
;
7196 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7198 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
7199 NULL
, >emp
, &def
, &dts
[3]);
7205 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
7206 vec_oprnds0
.pop ());
7207 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
7208 vec_oprnds1
.pop ());
7209 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7210 vec_oprnds2
.pop ());
7211 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7212 vec_oprnds3
.pop ());
7217 vec_oprnds0
.quick_push (vec_cond_lhs
);
7218 vec_oprnds1
.quick_push (vec_cond_rhs
);
7219 vec_oprnds2
.quick_push (vec_then_clause
);
7220 vec_oprnds3
.quick_push (vec_else_clause
);
7223 /* Arguments are ready. Create the new vector stmt. */
7224 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7226 vec_cond_rhs
= vec_oprnds1
[i
];
7227 vec_then_clause
= vec_oprnds2
[i
];
7228 vec_else_clause
= vec_oprnds3
[i
];
7230 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7231 vec_cond_lhs
, vec_cond_rhs
);
7232 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
7233 vec_compare
, vec_then_clause
, vec_else_clause
);
7235 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
7236 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7237 gimple_assign_set_lhs (new_stmt
, new_temp
);
7238 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7240 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7247 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7249 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7251 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7254 vec_oprnds0
.release ();
7255 vec_oprnds1
.release ();
7256 vec_oprnds2
.release ();
7257 vec_oprnds3
.release ();
7263 /* Make sure the statement is vectorizable. */
7266 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
7268 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7269 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7270 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7272 tree scalar_type
, vectype
;
7273 gimple pattern_stmt
;
7274 gimple_seq pattern_def_seq
;
7276 if (dump_enabled_p ())
7278 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7279 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7282 if (gimple_has_volatile_ops (stmt
))
7284 if (dump_enabled_p ())
7285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7286 "not vectorized: stmt has volatile operands\n");
7291 /* Skip stmts that do not need to be vectorized. In loops this is expected
7293 - the COND_EXPR which is the loop exit condition
7294 - any LABEL_EXPRs in the loop
7295 - computations that are used only for array indexing or loop control.
7296 In basic blocks we only analyze statements that are a part of some SLP
7297 instance, therefore, all the statements are relevant.
7299 Pattern statement needs to be analyzed instead of the original statement
7300 if the original statement is not relevant. Otherwise, we analyze both
7301 statements. In basic blocks we are called from some SLP instance
7302 traversal, don't analyze pattern stmts instead, the pattern stmts
7303 already will be part of SLP instance. */
7305 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7306 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7307 && !STMT_VINFO_LIVE_P (stmt_info
))
7309 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7311 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7312 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7314 /* Analyze PATTERN_STMT instead of the original stmt. */
7315 stmt
= pattern_stmt
;
7316 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7317 if (dump_enabled_p ())
7319 dump_printf_loc (MSG_NOTE
, vect_location
,
7320 "==> examining pattern statement: ");
7321 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7326 if (dump_enabled_p ())
7327 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7332 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7335 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7336 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7338 /* Analyze PATTERN_STMT too. */
7339 if (dump_enabled_p ())
7341 dump_printf_loc (MSG_NOTE
, vect_location
,
7342 "==> examining pattern statement: ");
7343 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7346 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7350 if (is_pattern_stmt_p (stmt_info
)
7352 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7354 gimple_stmt_iterator si
;
7356 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7358 gimple pattern_def_stmt
= gsi_stmt (si
);
7359 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7360 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7362 /* Analyze def stmt of STMT if it's a pattern stmt. */
7363 if (dump_enabled_p ())
7365 dump_printf_loc (MSG_NOTE
, vect_location
,
7366 "==> examining pattern def statement: ");
7367 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7370 if (!vect_analyze_stmt (pattern_def_stmt
,
7371 need_to_vectorize
, node
))
7377 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7379 case vect_internal_def
:
7382 case vect_reduction_def
:
7383 case vect_nested_cycle
:
7384 gcc_assert (!bb_vinfo
7385 && (relevance
== vect_used_in_outer
7386 || relevance
== vect_used_in_outer_by_reduction
7387 || relevance
== vect_used_by_reduction
7388 || relevance
== vect_unused_in_scope
));
7391 case vect_induction_def
:
7392 case vect_constant_def
:
7393 case vect_external_def
:
7394 case vect_unknown_def_type
:
7401 gcc_assert (PURE_SLP_STMT (stmt_info
));
7403 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7404 if (dump_enabled_p ())
7406 dump_printf_loc (MSG_NOTE
, vect_location
,
7407 "get vectype for scalar type: ");
7408 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7409 dump_printf (MSG_NOTE
, "\n");
7412 vectype
= get_vectype_for_scalar_type (scalar_type
);
7415 if (dump_enabled_p ())
7417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7418 "not SLPed: unsupported data-type ");
7419 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7421 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7426 if (dump_enabled_p ())
7428 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7429 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7430 dump_printf (MSG_NOTE
, "\n");
7433 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7436 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7438 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7439 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7440 || (is_gimple_call (stmt
)
7441 && gimple_call_lhs (stmt
) == NULL_TREE
));
7442 *need_to_vectorize
= true;
7445 if (PURE_SLP_STMT (stmt_info
) && !node
)
7447 dump_printf_loc (MSG_NOTE
, vect_location
,
7448 "handled only by SLP analysis\n");
7454 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7455 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7456 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7457 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7458 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7459 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7460 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7461 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7462 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7463 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7464 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
7465 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7469 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7470 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7471 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7472 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7473 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7474 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7475 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7476 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7477 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7482 if (dump_enabled_p ())
7484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7485 "not vectorized: relevant stmt not ");
7486 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7487 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7496 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7497 need extra handling, except for vectorizable reductions. */
7498 if (STMT_VINFO_LIVE_P (stmt_info
)
7499 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7500 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7504 if (dump_enabled_p ())
7506 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7507 "not vectorized: live stmt not ");
7508 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7509 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7519 /* Function vect_transform_stmt.
7521 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7524 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7525 bool *grouped_store
, slp_tree slp_node
,
7526 slp_instance slp_node_instance
)
7528 bool is_store
= false;
7529 gimple vec_stmt
= NULL
;
7530 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7533 gimple old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7535 switch (STMT_VINFO_TYPE (stmt_info
))
7537 case type_demotion_vec_info_type
:
7538 case type_promotion_vec_info_type
:
7539 case type_conversion_vec_info_type
:
7540 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7544 case induc_vec_info_type
:
7545 gcc_assert (!slp_node
);
7546 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7550 case shift_vec_info_type
:
7551 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7555 case op_vec_info_type
:
7556 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7560 case assignment_vec_info_type
:
7561 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7565 case load_vec_info_type
:
7566 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7571 case store_vec_info_type
:
7572 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7574 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7576 /* In case of interleaving, the whole chain is vectorized when the
7577 last store in the chain is reached. Store stmts before the last
7578 one are skipped, and there vec_stmt_info shouldn't be freed
7580 *grouped_store
= true;
7581 if (STMT_VINFO_VEC_STMT (stmt_info
))
7588 case condition_vec_info_type
:
7589 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7593 case call_vec_info_type
:
7594 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7595 stmt
= gsi_stmt (*gsi
);
7596 if (is_gimple_call (stmt
)
7597 && gimple_call_internal_p (stmt
)
7598 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7602 case call_simd_clone_vec_info_type
:
7603 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7604 stmt
= gsi_stmt (*gsi
);
7607 case reduc_vec_info_type
:
7608 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7613 if (!STMT_VINFO_LIVE_P (stmt_info
))
7615 if (dump_enabled_p ())
7616 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7617 "stmt not supported.\n");
7622 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
7623 This would break hybrid SLP vectorization. */
7626 if (PURE_SLP_STMT (stmt_info
))
7627 gcc_assert (!old_vec_stmt
&& !vec_stmt
7628 && !STMT_VINFO_VEC_STMT (stmt_info
));
7629 else if (HYBRID_SLP_STMT (stmt_info
))
7630 gcc_assert (!vec_stmt
7631 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
7634 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7635 is being vectorized, but outside the immediately enclosing loop. */
7637 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7638 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7639 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7640 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7641 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7642 || STMT_VINFO_RELEVANT (stmt_info
) ==
7643 vect_used_in_outer_by_reduction
))
7645 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7646 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7647 imm_use_iterator imm_iter
;
7648 use_operand_p use_p
;
7652 if (dump_enabled_p ())
7653 dump_printf_loc (MSG_NOTE
, vect_location
,
7654 "Record the vdef for outer-loop vectorization.\n");
7656 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7657 (to be used when vectorizing outer-loop stmts that use the DEF of
7659 if (gimple_code (stmt
) == GIMPLE_PHI
)
7660 scalar_dest
= PHI_RESULT (stmt
);
7662 scalar_dest
= gimple_assign_lhs (stmt
);
7664 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7666 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7668 exit_phi
= USE_STMT (use_p
);
7669 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7674 /* Handle stmts whose DEF is used outside the loop-nest that is
7675 being vectorized. */
7676 if (STMT_VINFO_LIVE_P (stmt_info
)
7677 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7679 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7684 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7690 /* Remove a group of stores (for SLP or interleaving), free their
7694 vect_remove_stores (gimple first_stmt
)
7696 gimple next
= first_stmt
;
7698 gimple_stmt_iterator next_si
;
7702 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7704 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7705 if (is_pattern_stmt_p (stmt_info
))
7706 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7707 /* Free the attached stmt_vec_info and remove the stmt. */
7708 next_si
= gsi_for_stmt (next
);
7709 unlink_stmt_vdef (next
);
7710 gsi_remove (&next_si
, true);
7711 release_defs (next
);
7712 free_stmt_vec_info (next
);
7718 /* Function new_stmt_vec_info.
7720 Create and initialize a new stmt_vec_info struct for STMT. */
7723 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7724 bb_vec_info bb_vinfo
)
7727 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7729 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7730 STMT_VINFO_STMT (res
) = stmt
;
7731 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7732 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7733 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7734 STMT_VINFO_LIVE_P (res
) = false;
7735 STMT_VINFO_VECTYPE (res
) = NULL
;
7736 STMT_VINFO_VEC_STMT (res
) = NULL
;
7737 STMT_VINFO_VECTORIZABLE (res
) = true;
7738 STMT_VINFO_IN_PATTERN_P (res
) = false;
7739 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7740 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7741 STMT_VINFO_DATA_REF (res
) = NULL
;
7743 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7744 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7745 STMT_VINFO_DR_INIT (res
) = NULL
;
7746 STMT_VINFO_DR_STEP (res
) = NULL
;
7747 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7749 if (gimple_code (stmt
) == GIMPLE_PHI
7750 && is_loop_header_bb_p (gimple_bb (stmt
)))
7751 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7753 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7755 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7756 STMT_SLP_TYPE (res
) = loop_vect
;
7757 GROUP_FIRST_ELEMENT (res
) = NULL
;
7758 GROUP_NEXT_ELEMENT (res
) = NULL
;
7759 GROUP_SIZE (res
) = 0;
7760 GROUP_STORE_COUNT (res
) = 0;
7761 GROUP_GAP (res
) = 0;
7762 GROUP_SAME_DR_STMT (res
) = NULL
;
7768 /* Create a hash table for stmt_vec_info. */
7771 init_stmt_vec_info_vec (void)
7773 gcc_assert (!stmt_vec_info_vec
.exists ());
7774 stmt_vec_info_vec
.create (50);
7778 /* Free hash table for stmt_vec_info. */
7781 free_stmt_vec_info_vec (void)
7785 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7787 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7788 gcc_assert (stmt_vec_info_vec
.exists ());
7789 stmt_vec_info_vec
.release ();
7793 /* Free stmt vectorization related info. */
7796 free_stmt_vec_info (gimple stmt
)
7798 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7803 /* Check if this statement has a related "pattern stmt"
7804 (introduced by the vectorizer during the pattern recognition
7805 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7807 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7809 stmt_vec_info patt_info
7810 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7813 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7814 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7815 gimple_set_bb (patt_stmt
, NULL
);
7816 tree lhs
= gimple_get_lhs (patt_stmt
);
7817 if (TREE_CODE (lhs
) == SSA_NAME
)
7818 release_ssa_name (lhs
);
7821 gimple_stmt_iterator si
;
7822 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7824 gimple seq_stmt
= gsi_stmt (si
);
7825 gimple_set_bb (seq_stmt
, NULL
);
7826 lhs
= gimple_get_lhs (patt_stmt
);
7827 if (TREE_CODE (lhs
) == SSA_NAME
)
7828 release_ssa_name (lhs
);
7829 free_stmt_vec_info (seq_stmt
);
7832 free_stmt_vec_info (patt_stmt
);
7836 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7837 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
7838 set_vinfo_for_stmt (stmt
, NULL
);
7843 /* Function get_vectype_for_scalar_type_and_size.
7845 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7849 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7851 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7852 machine_mode simd_mode
;
7853 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7860 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7861 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7864 /* For vector types of elements whose mode precision doesn't
7865 match their types precision we use a element type of mode
7866 precision. The vectorization routines will have to make sure
7867 they support the proper result truncation/extension.
7868 We also make sure to build vector types with INTEGER_TYPE
7869 component type only. */
7870 if (INTEGRAL_TYPE_P (scalar_type
)
7871 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7872 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7873 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7874 TYPE_UNSIGNED (scalar_type
));
7876 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7877 When the component mode passes the above test simply use a type
7878 corresponding to that mode. The theory is that any use that
7879 would cause problems with this will disable vectorization anyway. */
7880 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7881 && !INTEGRAL_TYPE_P (scalar_type
))
7882 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7884 /* We can't build a vector type of elements with alignment bigger than
7886 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7887 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7888 TYPE_UNSIGNED (scalar_type
));
7890 /* If we felt back to using the mode fail if there was
7891 no scalar type for it. */
7892 if (scalar_type
== NULL_TREE
)
7895 /* If no size was supplied use the mode the target prefers. Otherwise
7896 lookup a vector mode of the specified size. */
7898 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7900 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7901 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7905 vectype
= build_vector_type (scalar_type
, nunits
);
7907 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7908 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7914 unsigned int current_vector_size
;
7916 /* Function get_vectype_for_scalar_type.
7918 Returns the vector type corresponding to SCALAR_TYPE as supported
7922 get_vectype_for_scalar_type (tree scalar_type
)
7925 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7926 current_vector_size
);
7928 && current_vector_size
== 0)
7929 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7933 /* Function get_same_sized_vectype
7935 Returns a vector type corresponding to SCALAR_TYPE of size
7936 VECTOR_TYPE if supported by the target. */
7939 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7941 return get_vectype_for_scalar_type_and_size
7942 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7945 /* Function vect_is_simple_use.
7948 LOOP_VINFO - the vect info of the loop that is being vectorized.
7949 BB_VINFO - the vect info of the basic block that is being vectorized.
7950 OPERAND - operand of STMT in the loop or bb.
7951 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7953 Returns whether a stmt with OPERAND can be vectorized.
7954 For loops, supportable operands are constants, loop invariants, and operands
7955 that are defined by the current iteration of the loop. Unsupportable
7956 operands are those that are defined by a previous iteration of the loop (as
7957 is the case in reduction/induction computations).
7958 For basic blocks, supportable operands are constants and bb invariants.
7959 For now, operands defined outside the basic block are not supported. */
7962 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7963 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7964 tree
*def
, enum vect_def_type
*dt
)
7968 *dt
= vect_unknown_def_type
;
7970 if (dump_enabled_p ())
7972 dump_printf_loc (MSG_NOTE
, vect_location
,
7973 "vect_is_simple_use: operand ");
7974 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7975 dump_printf (MSG_NOTE
, "\n");
7978 if (CONSTANT_CLASS_P (operand
))
7980 *dt
= vect_constant_def
;
7984 if (is_gimple_min_invariant (operand
))
7987 *dt
= vect_external_def
;
7991 if (TREE_CODE (operand
) != SSA_NAME
)
7993 if (dump_enabled_p ())
7994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7999 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8002 *dt
= vect_external_def
;
8006 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8007 if (dump_enabled_p ())
8009 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8010 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8013 basic_block bb
= gimple_bb (*def_stmt
);
8014 if ((loop_vinfo
&& !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), bb
))
8016 && (bb
!= BB_VINFO_BB (bb_vinfo
)
8017 || gimple_code (*def_stmt
) == GIMPLE_PHI
)))
8018 *dt
= vect_external_def
;
8021 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8022 if (bb_vinfo
&& !STMT_VINFO_VECTORIZABLE (stmt_vinfo
))
8023 *dt
= vect_external_def
;
8025 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8028 if (dump_enabled_p ())
8030 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8033 case vect_uninitialized_def
:
8034 dump_printf (MSG_NOTE
, "uninitialized\n");
8036 case vect_constant_def
:
8037 dump_printf (MSG_NOTE
, "constant\n");
8039 case vect_external_def
:
8040 dump_printf (MSG_NOTE
, "external\n");
8042 case vect_internal_def
:
8043 dump_printf (MSG_NOTE
, "internal\n");
8045 case vect_induction_def
:
8046 dump_printf (MSG_NOTE
, "induction\n");
8048 case vect_reduction_def
:
8049 dump_printf (MSG_NOTE
, "reduction\n");
8051 case vect_double_reduction_def
:
8052 dump_printf (MSG_NOTE
, "double reduction\n");
8054 case vect_nested_cycle
:
8055 dump_printf (MSG_NOTE
, "nested cycle\n");
8057 case vect_unknown_def_type
:
8058 dump_printf (MSG_NOTE
, "unknown\n");
8063 if (*dt
== vect_unknown_def_type
8065 && *dt
== vect_double_reduction_def
8066 && gimple_code (stmt
) != GIMPLE_PHI
))
8068 if (dump_enabled_p ())
8069 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8070 "Unsupported pattern.\n");
8074 switch (gimple_code (*def_stmt
))
8077 *def
= gimple_phi_result (*def_stmt
);
8081 *def
= gimple_assign_lhs (*def_stmt
);
8085 *def
= gimple_call_lhs (*def_stmt
);
8090 if (dump_enabled_p ())
8091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8092 "unsupported defining stmt:\n");
8099 /* Function vect_is_simple_use_1.
8101 Same as vect_is_simple_use_1 but also determines the vector operand
8102 type of OPERAND and stores it to *VECTYPE. If the definition of
8103 OPERAND is vect_uninitialized_def, vect_constant_def or
8104 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8105 is responsible to compute the best suited vector type for the
8109 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
8110 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
8111 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
8113 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
8117 /* Now get a vector type if the def is internal, otherwise supply
8118 NULL_TREE and leave it up to the caller to figure out a proper
8119 type for the use stmt. */
8120 if (*dt
== vect_internal_def
8121 || *dt
== vect_induction_def
8122 || *dt
== vect_reduction_def
8123 || *dt
== vect_double_reduction_def
8124 || *dt
== vect_nested_cycle
)
8126 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8128 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8129 && !STMT_VINFO_RELEVANT (stmt_info
)
8130 && !STMT_VINFO_LIVE_P (stmt_info
))
8131 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8133 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8134 gcc_assert (*vectype
!= NULL_TREE
);
8136 else if (*dt
== vect_uninitialized_def
8137 || *dt
== vect_constant_def
8138 || *dt
== vect_external_def
)
8139 *vectype
= NULL_TREE
;
8147 /* Function supportable_widening_operation
8149 Check whether an operation represented by the code CODE is a
8150 widening operation that is supported by the target platform in
8151 vector form (i.e., when operating on arguments of type VECTYPE_IN
8152 producing a result of type VECTYPE_OUT).
8154 Widening operations we currently support are NOP (CONVERT), FLOAT
8155 and WIDEN_MULT. This function checks if these operations are supported
8156 by the target platform either directly (via vector tree-codes), or via
8160 - CODE1 and CODE2 are codes of vector operations to be used when
8161 vectorizing the operation, if available.
8162 - MULTI_STEP_CVT determines the number of required intermediate steps in
8163 case of multi-step conversion (like char->short->int - in that case
8164 MULTI_STEP_CVT will be 1).
8165 - INTERM_TYPES contains the intermediate type required to perform the
8166 widening operation (short in the above example). */
8169 supportable_widening_operation (enum tree_code code
, gimple stmt
,
8170 tree vectype_out
, tree vectype_in
,
8171 enum tree_code
*code1
, enum tree_code
*code2
,
8172 int *multi_step_cvt
,
8173 vec
<tree
> *interm_types
)
8175 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8176 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8177 struct loop
*vect_loop
= NULL
;
8178 machine_mode vec_mode
;
8179 enum insn_code icode1
, icode2
;
8180 optab optab1
, optab2
;
8181 tree vectype
= vectype_in
;
8182 tree wide_vectype
= vectype_out
;
8183 enum tree_code c1
, c2
;
8185 tree prev_type
, intermediate_type
;
8186 machine_mode intermediate_mode
, prev_mode
;
8187 optab optab3
, optab4
;
8189 *multi_step_cvt
= 0;
8191 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8195 case WIDEN_MULT_EXPR
:
8196 /* The result of a vectorized widening operation usually requires
8197 two vectors (because the widened results do not fit into one vector).
8198 The generated vector results would normally be expected to be
8199 generated in the same order as in the original scalar computation,
8200 i.e. if 8 results are generated in each vector iteration, they are
8201 to be organized as follows:
8202 vect1: [res1,res2,res3,res4],
8203 vect2: [res5,res6,res7,res8].
8205 However, in the special case that the result of the widening
8206 operation is used in a reduction computation only, the order doesn't
8207 matter (because when vectorizing a reduction we change the order of
8208 the computation). Some targets can take advantage of this and
8209 generate more efficient code. For example, targets like Altivec,
8210 that support widen_mult using a sequence of {mult_even,mult_odd}
8211 generate the following vectors:
8212 vect1: [res1,res3,res5,res7],
8213 vect2: [res2,res4,res6,res8].
8215 When vectorizing outer-loops, we execute the inner-loop sequentially
8216 (each vectorized inner-loop iteration contributes to VF outer-loop
8217 iterations in parallel). We therefore don't allow to change the
8218 order of the computation in the inner-loop during outer-loop
8220 /* TODO: Another case in which order doesn't *really* matter is when we
8221 widen and then contract again, e.g. (short)((int)x * y >> 8).
8222 Normally, pack_trunc performs an even/odd permute, whereas the
8223 repack from an even/odd expansion would be an interleave, which
8224 would be significantly simpler for e.g. AVX2. */
8225 /* In any case, in order to avoid duplicating the code below, recurse
8226 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8227 are properly set up for the caller. If we fail, we'll continue with
8228 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8230 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8231 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8232 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8233 stmt
, vectype_out
, vectype_in
,
8234 code1
, code2
, multi_step_cvt
,
8237 /* Elements in a vector with vect_used_by_reduction property cannot
8238 be reordered if the use chain with this property does not have the
8239 same operation. One such an example is s += a * b, where elements
8240 in a and b cannot be reordered. Here we check if the vector defined
8241 by STMT is only directly used in the reduction statement. */
8242 tree lhs
= gimple_assign_lhs (stmt
);
8243 use_operand_p dummy
;
8245 stmt_vec_info use_stmt_info
= NULL
;
8246 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8247 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8248 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8251 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8252 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8255 case VEC_WIDEN_MULT_EVEN_EXPR
:
8256 /* Support the recursion induced just above. */
8257 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8258 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8261 case WIDEN_LSHIFT_EXPR
:
8262 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8263 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8267 c1
= VEC_UNPACK_LO_EXPR
;
8268 c2
= VEC_UNPACK_HI_EXPR
;
8272 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8273 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8276 case FIX_TRUNC_EXPR
:
8277 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8278 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8279 computing the operation. */
8286 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8289 if (code
== FIX_TRUNC_EXPR
)
8291 /* The signedness is determined from output operand. */
8292 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8293 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8297 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8298 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8301 if (!optab1
|| !optab2
)
8304 vec_mode
= TYPE_MODE (vectype
);
8305 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8306 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8312 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8313 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8316 /* Check if it's a multi-step conversion that can be done using intermediate
8319 prev_type
= vectype
;
8320 prev_mode
= vec_mode
;
8322 if (!CONVERT_EXPR_CODE_P (code
))
8325 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8326 intermediate steps in promotion sequence. We try
8327 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8329 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8330 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8332 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8334 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8335 TYPE_UNSIGNED (prev_type
));
8336 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8337 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8339 if (!optab3
|| !optab4
8340 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8341 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8342 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8343 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8344 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8345 == CODE_FOR_nothing
)
8346 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8347 == CODE_FOR_nothing
))
8350 interm_types
->quick_push (intermediate_type
);
8351 (*multi_step_cvt
)++;
8353 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8354 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8357 prev_type
= intermediate_type
;
8358 prev_mode
= intermediate_mode
;
8361 interm_types
->release ();
8366 /* Function supportable_narrowing_operation
8368 Check whether an operation represented by the code CODE is a
8369 narrowing operation that is supported by the target platform in
8370 vector form (i.e., when operating on arguments of type VECTYPE_IN
8371 and producing a result of type VECTYPE_OUT).
8373 Narrowing operations we currently support are NOP (CONVERT) and
8374 FIX_TRUNC. This function checks if these operations are supported by
8375 the target platform directly via vector tree-codes.
8378 - CODE1 is the code of a vector operation to be used when
8379 vectorizing the operation, if available.
8380 - MULTI_STEP_CVT determines the number of required intermediate steps in
8381 case of multi-step conversion (like int->short->char - in that case
8382 MULTI_STEP_CVT will be 1).
8383 - INTERM_TYPES contains the intermediate type required to perform the
8384 narrowing operation (short in the above example). */
8387 supportable_narrowing_operation (enum tree_code code
,
8388 tree vectype_out
, tree vectype_in
,
8389 enum tree_code
*code1
, int *multi_step_cvt
,
8390 vec
<tree
> *interm_types
)
8392 machine_mode vec_mode
;
8393 enum insn_code icode1
;
8394 optab optab1
, interm_optab
;
8395 tree vectype
= vectype_in
;
8396 tree narrow_vectype
= vectype_out
;
8398 tree intermediate_type
;
8399 machine_mode intermediate_mode
, prev_mode
;
8403 *multi_step_cvt
= 0;
8407 c1
= VEC_PACK_TRUNC_EXPR
;
8410 case FIX_TRUNC_EXPR
:
8411 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8415 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8416 tree code and optabs used for computing the operation. */
8423 if (code
== FIX_TRUNC_EXPR
)
8424 /* The signedness is determined from output operand. */
8425 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8427 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8432 vec_mode
= TYPE_MODE (vectype
);
8433 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8438 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8441 /* Check if it's a multi-step conversion that can be done using intermediate
8443 prev_mode
= vec_mode
;
8444 if (code
== FIX_TRUNC_EXPR
)
8445 uns
= TYPE_UNSIGNED (vectype_out
);
8447 uns
= TYPE_UNSIGNED (vectype
);
8449 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8450 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8451 costly than signed. */
8452 if (code
== FIX_TRUNC_EXPR
&& uns
)
8454 enum insn_code icode2
;
8457 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8459 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8460 if (interm_optab
!= unknown_optab
8461 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8462 && insn_data
[icode1
].operand
[0].mode
8463 == insn_data
[icode2
].operand
[0].mode
)
8466 optab1
= interm_optab
;
8471 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8472 intermediate steps in promotion sequence. We try
8473 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8474 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8475 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8477 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8479 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8481 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8484 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8485 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8486 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8487 == CODE_FOR_nothing
))
8490 interm_types
->quick_push (intermediate_type
);
8491 (*multi_step_cvt
)++;
8493 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8496 prev_mode
= intermediate_mode
;
8497 optab1
= interm_optab
;
8500 interm_types
->release ();