1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "hard-reg-set.h"
44 #include "dominance.h"
46 #include "basic-block.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-ssa-alias.h"
49 #include "internal-fn.h"
51 #include "gimple-expr.h"
55 #include "gimple-iterator.h"
56 #include "gimplify-me.h"
57 #include "gimple-ssa.h"
59 #include "tree-phinodes.h"
60 #include "ssa-iterators.h"
61 #include "stringpool.h"
62 #include "tree-ssanames.h"
63 #include "tree-ssa-loop-manip.h"
65 #include "tree-ssa-loop.h"
66 #include "tree-scalar-evolution.h"
68 #include "recog.h" /* FIXME: for insn_data */
69 #include "insn-codes.h"
71 #include "diagnostic-core.h"
72 #include "tree-vectorizer.h"
75 #include "plugin-api.h"
80 /* For lang_hooks.types.type_for_mode. */
81 #include "langhooks.h"
83 /* Return the vectorized type for the given statement. */
86 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
88 return STMT_VINFO_VECTYPE (stmt_info
);
91 /* Return TRUE iff the given statement is in an inner loop relative to
92 the loop being vectorized. */
94 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
96 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
97 basic_block bb
= gimple_bb (stmt
);
98 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
104 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
106 return (bb
->loop_father
== loop
->inner
);
109 /* Record the cost of a statement, either by directly informing the
110 target model or by saving it in a vector for later processing.
111 Return a preliminary estimate of the statement's cost. */
114 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
115 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
116 int misalign
, enum vect_cost_model_location where
)
120 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
121 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
122 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
125 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
130 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
131 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
132 void *target_cost_data
;
135 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
137 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
139 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
144 /* Return a variable of type ELEM_TYPE[NELEMS]. */
147 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
149 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
153 /* ARRAY is an array of vectors created by create_vector_array.
154 Return an SSA_NAME for the vector in index N. The reference
155 is part of the vectorization of STMT and the vector is associated
156 with scalar destination SCALAR_DEST. */
159 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
160 tree array
, unsigned HOST_WIDE_INT n
)
162 tree vect_type
, vect
, vect_name
, array_ref
;
165 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
166 vect_type
= TREE_TYPE (TREE_TYPE (array
));
167 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
168 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
169 build_int_cst (size_type_node
, n
),
170 NULL_TREE
, NULL_TREE
);
172 new_stmt
= gimple_build_assign (vect
, array_ref
);
173 vect_name
= make_ssa_name (vect
, new_stmt
);
174 gimple_assign_set_lhs (new_stmt
, vect_name
);
175 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
180 /* ARRAY is an array of vectors created by create_vector_array.
181 Emit code to store SSA_NAME VECT in index N of the array.
182 The store is part of the vectorization of STMT. */
185 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
186 tree array
, unsigned HOST_WIDE_INT n
)
191 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
192 build_int_cst (size_type_node
, n
),
193 NULL_TREE
, NULL_TREE
);
195 new_stmt
= gimple_build_assign (array_ref
, vect
);
196 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
199 /* PTR is a pointer to an array of type TYPE. Return a representation
200 of *PTR. The memory reference replaces those in FIRST_DR
204 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
206 tree mem_ref
, alias_ptr_type
;
208 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
209 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
210 /* Arrays have the same alignment as their type. */
211 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
215 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
217 /* Function vect_mark_relevant.
219 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
222 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
223 enum vect_relevant relevant
, bool live_p
,
224 bool used_in_pattern
)
226 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
227 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
228 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
231 if (dump_enabled_p ())
232 dump_printf_loc (MSG_NOTE
, vect_location
,
233 "mark relevant %d, live %d.\n", relevant
, live_p
);
235 /* If this stmt is an original stmt in a pattern, we might need to mark its
236 related pattern stmt instead of the original stmt. However, such stmts
237 may have their own uses that are not in any pattern, in such cases the
238 stmt itself should be marked. */
239 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
242 if (!used_in_pattern
)
244 imm_use_iterator imm_iter
;
248 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
249 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
251 if (is_gimple_assign (stmt
))
252 lhs
= gimple_assign_lhs (stmt
);
254 lhs
= gimple_call_lhs (stmt
);
256 /* This use is out of pattern use, if LHS has other uses that are
257 pattern uses, we should mark the stmt itself, and not the pattern
259 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
260 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
262 if (is_gimple_debug (USE_STMT (use_p
)))
264 use_stmt
= USE_STMT (use_p
);
266 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
269 if (vinfo_for_stmt (use_stmt
)
270 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
280 /* This is the last stmt in a sequence that was detected as a
281 pattern that can potentially be vectorized. Don't mark the stmt
282 as relevant/live because it's not going to be vectorized.
283 Instead mark the pattern-stmt that replaces it. */
285 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
287 if (dump_enabled_p ())
288 dump_printf_loc (MSG_NOTE
, vect_location
,
289 "last stmt in pattern. don't mark"
290 " relevant/live.\n");
291 stmt_info
= vinfo_for_stmt (pattern_stmt
);
292 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
293 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
294 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
299 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
300 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
301 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
303 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
304 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
306 if (dump_enabled_p ())
307 dump_printf_loc (MSG_NOTE
, vect_location
,
308 "already marked relevant/live.\n");
312 worklist
->safe_push (stmt
);
316 /* Function vect_stmt_relevant_p.
318 Return true if STMT in loop that is represented by LOOP_VINFO is
319 "relevant for vectorization".
321 A stmt is considered "relevant for vectorization" if:
322 - it has uses outside the loop.
323 - it has vdefs (it alters memory).
324 - control stmts in the loop (except for the exit condition).
326 CHECKME: what other side effects would the vectorizer allow? */
329 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
330 enum vect_relevant
*relevant
, bool *live_p
)
332 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
334 imm_use_iterator imm_iter
;
338 *relevant
= vect_unused_in_scope
;
341 /* cond stmt other than loop exit cond. */
342 if (is_ctrl_stmt (stmt
)
343 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
344 != loop_exit_ctrl_vec_info_type
)
345 *relevant
= vect_used_in_scope
;
347 /* changing memory. */
348 if (gimple_code (stmt
) != GIMPLE_PHI
)
349 if (gimple_vdef (stmt
)
350 && !gimple_clobber_p (stmt
))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE
, vect_location
,
354 "vec_stmt_relevant_p: stmt has vdefs.\n");
355 *relevant
= vect_used_in_scope
;
358 /* uses outside the loop. */
359 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
361 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
363 basic_block bb
= gimple_bb (USE_STMT (use_p
));
364 if (!flow_bb_inside_loop_p (loop
, bb
))
366 if (dump_enabled_p ())
367 dump_printf_loc (MSG_NOTE
, vect_location
,
368 "vec_stmt_relevant_p: used out of loop.\n");
370 if (is_gimple_debug (USE_STMT (use_p
)))
373 /* We expect all such uses to be in the loop exit phis
374 (because of loop closed form) */
375 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
376 gcc_assert (bb
== single_exit (loop
)->dest
);
383 return (*live_p
|| *relevant
);
387 /* Function exist_non_indexing_operands_for_use_p
389 USE is one of the uses attached to STMT. Check if USE is
390 used in STMT for anything other than indexing an array. */
393 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
396 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
398 /* USE corresponds to some operand in STMT. If there is no data
399 reference in STMT, then any operand that corresponds to USE
400 is not indexing an array. */
401 if (!STMT_VINFO_DATA_REF (stmt_info
))
404 /* STMT has a data_ref. FORNOW this means that its of one of
408 (This should have been verified in analyze_data_refs).
410 'var' in the second case corresponds to a def, not a use,
411 so USE cannot correspond to any operands that are not used
414 Therefore, all we need to check is if STMT falls into the
415 first case, and whether var corresponds to USE. */
417 if (!gimple_assign_copy_p (stmt
))
419 if (is_gimple_call (stmt
)
420 && gimple_call_internal_p (stmt
))
421 switch (gimple_call_internal_fn (stmt
))
424 operand
= gimple_call_arg (stmt
, 3);
429 operand
= gimple_call_arg (stmt
, 2);
439 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
441 operand
= gimple_assign_rhs1 (stmt
);
442 if (TREE_CODE (operand
) != SSA_NAME
)
453 Function process_use.
456 - a USE in STMT in a loop represented by LOOP_VINFO
457 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
458 that defined USE. This is done by calling mark_relevant and passing it
459 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
460 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
464 Generally, LIVE_P and RELEVANT are used to define the liveness and
465 relevance info of the DEF_STMT of this USE:
466 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
467 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
469 - case 1: If USE is used only for address computations (e.g. array indexing),
470 which does not need to be directly vectorized, then the liveness/relevance
471 of the respective DEF_STMT is left unchanged.
472 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
473 skip DEF_STMT cause it had already been processed.
474 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
475 be modified accordingly.
477 Return true if everything is as expected. Return false otherwise. */
480 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
481 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
484 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
485 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
486 stmt_vec_info dstmt_vinfo
;
487 basic_block bb
, def_bb
;
490 enum vect_def_type dt
;
492 /* case 1: we are only interested in uses that need to be vectorized. Uses
493 that are used for address computation are not considered relevant. */
494 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
497 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
501 "not vectorized: unsupported use in stmt.\n");
505 if (!def_stmt
|| gimple_nop_p (def_stmt
))
508 def_bb
= gimple_bb (def_stmt
);
509 if (!flow_bb_inside_loop_p (loop
, def_bb
))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
516 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
517 DEF_STMT must have already been processed, because this should be the
518 only way that STMT, which is a reduction-phi, was put in the worklist,
519 as there should be no other uses for DEF_STMT in the loop. So we just
520 check that everything is as expected, and we are done. */
521 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
522 bb
= gimple_bb (stmt
);
523 if (gimple_code (stmt
) == GIMPLE_PHI
524 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
525 && gimple_code (def_stmt
) != GIMPLE_PHI
526 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
527 && bb
->loop_father
== def_bb
->loop_father
)
529 if (dump_enabled_p ())
530 dump_printf_loc (MSG_NOTE
, vect_location
,
531 "reduc-stmt defining reduc-phi in the same nest.\n");
532 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
533 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
534 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
535 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
536 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
540 /* case 3a: outer-loop stmt defining an inner-loop stmt:
541 outer-loop-header-bb:
547 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
549 if (dump_enabled_p ())
550 dump_printf_loc (MSG_NOTE
, vect_location
,
551 "outer-loop def-stmt defining inner-loop stmt.\n");
555 case vect_unused_in_scope
:
556 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
557 vect_used_in_scope
: vect_unused_in_scope
;
560 case vect_used_in_outer_by_reduction
:
561 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
562 relevant
= vect_used_by_reduction
;
565 case vect_used_in_outer
:
566 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
567 relevant
= vect_used_in_scope
;
570 case vect_used_in_scope
:
578 /* case 3b: inner-loop stmt defining an outer-loop stmt:
579 outer-loop-header-bb:
583 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
585 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
587 if (dump_enabled_p ())
588 dump_printf_loc (MSG_NOTE
, vect_location
,
589 "inner-loop def-stmt defining outer-loop stmt.\n");
593 case vect_unused_in_scope
:
594 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
595 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
596 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
599 case vect_used_by_reduction
:
600 relevant
= vect_used_in_outer_by_reduction
;
603 case vect_used_in_scope
:
604 relevant
= vect_used_in_outer
;
612 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
613 is_pattern_stmt_p (stmt_vinfo
));
618 /* Function vect_mark_stmts_to_be_vectorized.
620 Not all stmts in the loop need to be vectorized. For example:
629 Stmt 1 and 3 do not need to be vectorized, because loop control and
630 addressing of vectorized data-refs are handled differently.
632 This pass detects such stmts. */
635 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
637 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
638 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
639 unsigned int nbbs
= loop
->num_nodes
;
640 gimple_stmt_iterator si
;
643 stmt_vec_info stmt_vinfo
;
647 enum vect_relevant relevant
, tmp_relevant
;
648 enum vect_def_type def_type
;
650 if (dump_enabled_p ())
651 dump_printf_loc (MSG_NOTE
, vect_location
,
652 "=== vect_mark_stmts_to_be_vectorized ===\n");
654 auto_vec
<gimple
, 64> worklist
;
656 /* 1. Init worklist. */
657 for (i
= 0; i
< nbbs
; i
++)
660 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
667 dump_printf (MSG_NOTE
, "\n");
670 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
671 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
673 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
675 stmt
= gsi_stmt (si
);
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
679 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
680 dump_printf (MSG_NOTE
, "\n");
683 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
684 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
688 /* 2. Process_worklist */
689 while (worklist
.length () > 0)
694 stmt
= worklist
.pop ();
695 if (dump_enabled_p ())
697 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
698 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
699 dump_printf (MSG_NOTE
, "\n");
702 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
703 (DEF_STMT) as relevant/irrelevant and live/dead according to the
704 liveness and relevance properties of STMT. */
705 stmt_vinfo
= vinfo_for_stmt (stmt
);
706 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
707 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
709 /* Generally, the liveness and relevance properties of STMT are
710 propagated as is to the DEF_STMTs of its USEs:
711 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
712 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
714 One exception is when STMT has been identified as defining a reduction
715 variable; in this case we set the liveness/relevance as follows:
717 relevant = vect_used_by_reduction
718 This is because we distinguish between two kinds of relevant stmts -
719 those that are used by a reduction computation, and those that are
720 (also) used by a regular computation. This allows us later on to
721 identify stmts that are used solely by a reduction, and therefore the
722 order of the results that they produce does not have to be kept. */
724 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
725 tmp_relevant
= relevant
;
728 case vect_reduction_def
:
729 switch (tmp_relevant
)
731 case vect_unused_in_scope
:
732 relevant
= vect_used_by_reduction
;
735 case vect_used_by_reduction
:
736 if (gimple_code (stmt
) == GIMPLE_PHI
)
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
743 "unsupported use of reduction.\n");
750 case vect_nested_cycle
:
751 if (tmp_relevant
!= vect_unused_in_scope
752 && tmp_relevant
!= vect_used_in_outer_by_reduction
753 && tmp_relevant
!= vect_used_in_outer
)
755 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
757 "unsupported use of nested cycle.\n");
765 case vect_double_reduction_def
:
766 if (tmp_relevant
!= vect_unused_in_scope
767 && tmp_relevant
!= vect_used_by_reduction
)
769 if (dump_enabled_p ())
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
771 "unsupported use of double reduction.\n");
783 if (is_pattern_stmt_p (stmt_vinfo
))
785 /* Pattern statements are not inserted into the code, so
786 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
787 have to scan the RHS or function arguments instead. */
788 if (is_gimple_assign (stmt
))
790 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
791 tree op
= gimple_assign_rhs1 (stmt
);
794 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
796 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
797 live_p
, relevant
, &worklist
, false)
798 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
799 live_p
, relevant
, &worklist
, false))
803 for (; i
< gimple_num_ops (stmt
); i
++)
805 op
= gimple_op (stmt
, i
);
806 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
811 else if (is_gimple_call (stmt
))
813 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
815 tree arg
= gimple_call_arg (stmt
, i
);
816 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
823 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
825 tree op
= USE_FROM_PTR (use_p
);
826 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
831 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
834 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
836 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
840 } /* while worklist */
846 /* Function vect_model_simple_cost.
848 Models cost for simple operations, i.e. those that only emit ncopies of a
849 single op. Right now, this does not account for multiple insns that could
850 be generated for the single vector op. We will handle that shortly. */
853 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
854 enum vect_def_type
*dt
,
855 stmt_vector_for_cost
*prologue_cost_vec
,
856 stmt_vector_for_cost
*body_cost_vec
)
859 int inside_cost
= 0, prologue_cost
= 0;
861 /* The SLP costs were already calculated during SLP tree build. */
862 if (PURE_SLP_STMT (stmt_info
))
865 /* FORNOW: Assuming maximum 2 args per stmts. */
866 for (i
= 0; i
< 2; i
++)
867 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
868 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
869 stmt_info
, 0, vect_prologue
);
871 /* Pass the inside-of-loop statements to the target-specific cost model. */
872 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
873 stmt_info
, 0, vect_body
);
875 if (dump_enabled_p ())
876 dump_printf_loc (MSG_NOTE
, vect_location
,
877 "vect_model_simple_cost: inside_cost = %d, "
878 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
882 /* Model cost for type demotion and promotion operations. PWR is normally
883 zero for single-step promotions and demotions. It will be one if
884 two-step promotion/demotion is required, and so on. Each additional
885 step doubles the number of instructions required. */
888 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
889 enum vect_def_type
*dt
, int pwr
)
892 int inside_cost
= 0, prologue_cost
= 0;
893 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
894 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
895 void *target_cost_data
;
897 /* The SLP costs were already calculated during SLP tree build. */
898 if (PURE_SLP_STMT (stmt_info
))
902 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
904 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
906 for (i
= 0; i
< pwr
+ 1; i
++)
908 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
910 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
911 vec_promote_demote
, stmt_info
, 0,
915 /* FORNOW: Assuming maximum 2 args per stmts. */
916 for (i
= 0; i
< 2; i
++)
917 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
918 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
919 stmt_info
, 0, vect_prologue
);
921 if (dump_enabled_p ())
922 dump_printf_loc (MSG_NOTE
, vect_location
,
923 "vect_model_promotion_demotion_cost: inside_cost = %d, "
924 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
927 /* Function vect_cost_group_size
929 For grouped load or store, return the group_size only if it is the first
930 load or store of a group, else return 1. This ensures that group size is
931 only returned once per group. */
934 vect_cost_group_size (stmt_vec_info stmt_info
)
936 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
938 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
939 return GROUP_SIZE (stmt_info
);
945 /* Function vect_model_store_cost
947 Models cost for stores. In the case of grouped accesses, one access
948 has the overhead of the grouped access attributed to it. */
951 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
952 bool store_lanes_p
, enum vect_def_type dt
,
954 stmt_vector_for_cost
*prologue_cost_vec
,
955 stmt_vector_for_cost
*body_cost_vec
)
958 unsigned int inside_cost
= 0, prologue_cost
= 0;
959 struct data_reference
*first_dr
;
962 /* The SLP costs were already calculated during SLP tree build. */
963 if (PURE_SLP_STMT (stmt_info
))
966 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
967 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
968 stmt_info
, 0, vect_prologue
);
970 /* Grouped access? */
971 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
975 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
980 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
981 group_size
= vect_cost_group_size (stmt_info
);
984 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
986 /* Not a grouped access. */
990 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
993 /* We assume that the cost of a single store-lanes instruction is
994 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
995 access is instead being provided by a permute-and-store operation,
996 include the cost of the permutes. */
997 if (!store_lanes_p
&& group_size
> 1)
999 /* Uses a high and low interleave or shuffle operations for each
1001 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1002 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1003 stmt_info
, 0, vect_body
);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE
, vect_location
,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1011 /* Costs of the stores. */
1012 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_NOTE
, vect_location
,
1016 "vect_model_store_cost: inside_cost = %d, "
1017 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1021 /* Calculate cost of DR's memory access. */
1023 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1024 unsigned int *inside_cost
,
1025 stmt_vector_for_cost
*body_cost_vec
)
1027 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1028 gimple stmt
= DR_STMT (dr
);
1029 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1031 switch (alignment_support_scheme
)
1035 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1036 vector_store
, stmt_info
, 0,
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_NOTE
, vect_location
,
1041 "vect_model_store_cost: aligned.\n");
1045 case dr_unaligned_supported
:
1047 /* Here, we assign an additional cost for the unaligned store. */
1048 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1049 unaligned_store
, stmt_info
,
1050 DR_MISALIGNMENT (dr
), vect_body
);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE
, vect_location
,
1053 "vect_model_store_cost: unaligned supported by "
1058 case dr_unaligned_unsupported
:
1060 *inside_cost
= VECT_MAX_COST
;
1062 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1064 "vect_model_store_cost: unsupported access.\n");
1074 /* Function vect_model_load_cost
1076 Models cost for loads. In the case of grouped accesses, the last access
1077 has the overhead of the grouped access attributed to it. Since unaligned
1078 accesses are supported for loads, we also account for the costs of the
1079 access scheme chosen. */
1082 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1083 bool load_lanes_p
, slp_tree slp_node
,
1084 stmt_vector_for_cost
*prologue_cost_vec
,
1085 stmt_vector_for_cost
*body_cost_vec
)
1089 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1090 unsigned int inside_cost
= 0, prologue_cost
= 0;
1092 /* The SLP costs were already calculated during SLP tree build. */
1093 if (PURE_SLP_STMT (stmt_info
))
1096 /* Grouped accesses? */
1097 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1098 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1100 group_size
= vect_cost_group_size (stmt_info
);
1101 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1103 /* Not a grouped access. */
1110 /* We assume that the cost of a single load-lanes instruction is
1111 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1112 access is instead being provided by a load-and-permute operation,
1113 include the cost of the permutes. */
1114 if (!load_lanes_p
&& group_size
> 1)
1116 /* Uses an even and odd extract operations or shuffle operations
1117 for each needed permute. */
1118 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1119 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1120 stmt_info
, 0, vect_body
);
1122 if (dump_enabled_p ())
1123 dump_printf_loc (MSG_NOTE
, vect_location
,
1124 "vect_model_load_cost: strided group_size = %d .\n",
1128 /* The loads themselves. */
1129 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1131 /* N scalar loads plus gathering them into a vector. */
1132 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1133 inside_cost
+= record_stmt_cost (body_cost_vec
,
1134 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1135 scalar_load
, stmt_info
, 0, vect_body
);
1136 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1137 stmt_info
, 0, vect_body
);
1140 vect_get_load_cost (first_dr
, ncopies
,
1141 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1142 || group_size
> 1 || slp_node
),
1143 &inside_cost
, &prologue_cost
,
1144 prologue_cost_vec
, body_cost_vec
, true);
1146 if (dump_enabled_p ())
1147 dump_printf_loc (MSG_NOTE
, vect_location
,
1148 "vect_model_load_cost: inside_cost = %d, "
1149 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1153 /* Calculate cost of DR's memory access. */
1155 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1156 bool add_realign_cost
, unsigned int *inside_cost
,
1157 unsigned int *prologue_cost
,
1158 stmt_vector_for_cost
*prologue_cost_vec
,
1159 stmt_vector_for_cost
*body_cost_vec
,
1160 bool record_prologue_costs
)
1162 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1163 gimple stmt
= DR_STMT (dr
);
1164 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1166 switch (alignment_support_scheme
)
1170 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1171 stmt_info
, 0, vect_body
);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE
, vect_location
,
1175 "vect_model_load_cost: aligned.\n");
1179 case dr_unaligned_supported
:
1181 /* Here, we assign an additional cost for the unaligned load. */
1182 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1183 unaligned_load
, stmt_info
,
1184 DR_MISALIGNMENT (dr
), vect_body
);
1186 if (dump_enabled_p ())
1187 dump_printf_loc (MSG_NOTE
, vect_location
,
1188 "vect_model_load_cost: unaligned supported by "
1193 case dr_explicit_realign
:
1195 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1196 vector_load
, stmt_info
, 0, vect_body
);
1197 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1198 vec_perm
, stmt_info
, 0, vect_body
);
1200 /* FIXME: If the misalignment remains fixed across the iterations of
1201 the containing loop, the following cost should be added to the
1203 if (targetm
.vectorize
.builtin_mask_for_load
)
1204 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1205 stmt_info
, 0, vect_body
);
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE
, vect_location
,
1209 "vect_model_load_cost: explicit realign\n");
1213 case dr_explicit_realign_optimized
:
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE
, vect_location
,
1217 "vect_model_load_cost: unaligned software "
1220 /* Unaligned software pipeline has a load of an address, an initial
1221 load, and possibly a mask operation to "prime" the loop. However,
1222 if this is an access in a group of loads, which provide grouped
1223 access, then the above cost should only be considered for one
1224 access in the group. Inside the loop, there is a load op
1225 and a realignment op. */
1227 if (add_realign_cost
&& record_prologue_costs
)
1229 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1230 vector_stmt
, stmt_info
,
1232 if (targetm
.vectorize
.builtin_mask_for_load
)
1233 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1234 vector_stmt
, stmt_info
,
1238 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1239 stmt_info
, 0, vect_body
);
1240 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1241 stmt_info
, 0, vect_body
);
1243 if (dump_enabled_p ())
1244 dump_printf_loc (MSG_NOTE
, vect_location
,
1245 "vect_model_load_cost: explicit realign optimized"
1251 case dr_unaligned_unsupported
:
1253 *inside_cost
= VECT_MAX_COST
;
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1257 "vect_model_load_cost: unsupported access.\n");
1266 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1267 the loop preheader for the vectorized stmt STMT. */
1270 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1273 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1276 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1277 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1281 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1285 if (nested_in_vect_loop_p (loop
, stmt
))
1288 pe
= loop_preheader_edge (loop
);
1289 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1290 gcc_assert (!new_bb
);
1294 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1296 gimple_stmt_iterator gsi_bb_start
;
1298 gcc_assert (bb_vinfo
);
1299 bb
= BB_VINFO_BB (bb_vinfo
);
1300 gsi_bb_start
= gsi_after_labels (bb
);
1301 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1305 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_NOTE
, vect_location
,
1308 "created new init_stmt: ");
1309 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1310 dump_printf (MSG_NOTE
, "\n");
1314 /* Function vect_init_vector.
1316 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1317 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1318 vector type a vector with all elements equal to VAL is created first.
1319 Place the initialization at BSI if it is not NULL. Otherwise, place the
1320 initialization at the loop preheader.
1321 Return the DEF of INIT_STMT.
1322 It will be used in the vectorization of STMT. */
1325 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1332 if (TREE_CODE (type
) == VECTOR_TYPE
1333 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1335 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1337 if (CONSTANT_CLASS_P (val
))
1338 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1341 new_temp
= make_ssa_name (TREE_TYPE (type
));
1342 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1343 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1347 val
= build_vector_from_val (type
, val
);
1350 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1351 init_stmt
= gimple_build_assign (new_var
, val
);
1352 new_temp
= make_ssa_name (new_var
, init_stmt
);
1353 gimple_assign_set_lhs (init_stmt
, new_temp
);
1354 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1355 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1360 /* Function vect_get_vec_def_for_operand.
1362 OP is an operand in STMT. This function returns a (vector) def that will be
1363 used in the vectorized stmt for STMT.
1365 In the case that OP is an SSA_NAME which is defined in the loop, then
1366 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1368 In case OP is an invariant or constant, a new stmt that creates a vector def
1369 needs to be introduced. */
1372 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1377 stmt_vec_info def_stmt_info
= NULL
;
1378 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1379 unsigned int nunits
;
1380 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1382 enum vect_def_type dt
;
1386 if (dump_enabled_p ())
1388 dump_printf_loc (MSG_NOTE
, vect_location
,
1389 "vect_get_vec_def_for_operand: ");
1390 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1391 dump_printf (MSG_NOTE
, "\n");
1394 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1395 &def_stmt
, &def
, &dt
);
1396 gcc_assert (is_simple_use
);
1397 if (dump_enabled_p ())
1399 int loc_printed
= 0;
1402 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1404 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1405 dump_printf (MSG_NOTE
, "\n");
1410 dump_printf (MSG_NOTE
, " def_stmt = ");
1412 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1414 dump_printf (MSG_NOTE
, "\n");
1420 /* Case 1: operand is a constant. */
1421 case vect_constant_def
:
1423 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1424 gcc_assert (vector_type
);
1425 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1430 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1431 if (dump_enabled_p ())
1432 dump_printf_loc (MSG_NOTE
, vect_location
,
1433 "Create vector_cst. nunits = %d\n", nunits
);
1435 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1438 /* Case 2: operand is defined outside the loop - loop invariant. */
1439 case vect_external_def
:
1441 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1442 gcc_assert (vector_type
);
1447 /* Create 'vec_inv = {inv,inv,..,inv}' */
1448 if (dump_enabled_p ())
1449 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1451 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1454 /* Case 3: operand is defined inside the loop. */
1455 case vect_internal_def
:
1458 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1460 /* Get the def from the vectorized stmt. */
1461 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1463 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1464 /* Get vectorized pattern statement. */
1466 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1467 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1468 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1469 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1470 gcc_assert (vec_stmt
);
1471 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1472 vec_oprnd
= PHI_RESULT (vec_stmt
);
1473 else if (is_gimple_call (vec_stmt
))
1474 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1476 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1480 /* Case 4: operand is defined by a loop header phi - reduction */
1481 case vect_reduction_def
:
1482 case vect_double_reduction_def
:
1483 case vect_nested_cycle
:
1487 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1488 loop
= (gimple_bb (def_stmt
))->loop_father
;
1490 /* Get the def before the loop */
1491 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1492 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1495 /* Case 5: operand is defined by loop-header phi - induction. */
1496 case vect_induction_def
:
1498 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1500 /* Get the def from the vectorized stmt. */
1501 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1502 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1503 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1504 vec_oprnd
= PHI_RESULT (vec_stmt
);
1506 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1516 /* Function vect_get_vec_def_for_stmt_copy
1518 Return a vector-def for an operand. This function is used when the
1519 vectorized stmt to be created (by the caller to this function) is a "copy"
1520 created in case the vectorized result cannot fit in one vector, and several
1521 copies of the vector-stmt are required. In this case the vector-def is
1522 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1523 of the stmt that defines VEC_OPRND.
1524 DT is the type of the vector def VEC_OPRND.
1527 In case the vectorization factor (VF) is bigger than the number
1528 of elements that can fit in a vectype (nunits), we have to generate
1529 more than one vector stmt to vectorize the scalar stmt. This situation
1530 arises when there are multiple data-types operated upon in the loop; the
1531 smallest data-type determines the VF, and as a result, when vectorizing
1532 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1533 vector stmt (each computing a vector of 'nunits' results, and together
1534 computing 'VF' results in each iteration). This function is called when
1535 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1536 which VF=16 and nunits=4, so the number of copies required is 4):
1538 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1540 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1541 VS1.1: vx.1 = memref1 VS1.2
1542 VS1.2: vx.2 = memref2 VS1.3
1543 VS1.3: vx.3 = memref3
1545 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1546 VSnew.1: vz1 = vx.1 + ... VSnew.2
1547 VSnew.2: vz2 = vx.2 + ... VSnew.3
1548 VSnew.3: vz3 = vx.3 + ...
1550 The vectorization of S1 is explained in vectorizable_load.
1551 The vectorization of S2:
1552 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1553 the function 'vect_get_vec_def_for_operand' is called to
1554 get the relevant vector-def for each operand of S2. For operand x it
1555 returns the vector-def 'vx.0'.
1557 To create the remaining copies of the vector-stmt (VSnew.j), this
1558 function is called to get the relevant vector-def for each operand. It is
1559 obtained from the respective VS1.j stmt, which is recorded in the
1560 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1562 For example, to obtain the vector-def 'vx.1' in order to create the
1563 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1564 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1565 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1566 and return its def ('vx.1').
1567 Overall, to create the above sequence this function will be called 3 times:
1568 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1569 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1570 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1573 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1575 gimple vec_stmt_for_operand
;
1576 stmt_vec_info def_stmt_info
;
1578 /* Do nothing; can reuse same def. */
1579 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1582 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1583 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1584 gcc_assert (def_stmt_info
);
1585 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1586 gcc_assert (vec_stmt_for_operand
);
1587 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1588 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1589 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1591 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1596 /* Get vectorized definitions for the operands to create a copy of an original
1597 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1600 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1601 vec
<tree
> *vec_oprnds0
,
1602 vec
<tree
> *vec_oprnds1
)
1604 tree vec_oprnd
= vec_oprnds0
->pop ();
1606 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1607 vec_oprnds0
->quick_push (vec_oprnd
);
1609 if (vec_oprnds1
&& vec_oprnds1
->length ())
1611 vec_oprnd
= vec_oprnds1
->pop ();
1612 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1613 vec_oprnds1
->quick_push (vec_oprnd
);
1618 /* Get vectorized definitions for OP0 and OP1.
1619 REDUC_INDEX is the index of reduction operand in case of reduction,
1620 and -1 otherwise. */
1623 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1624 vec
<tree
> *vec_oprnds0
,
1625 vec
<tree
> *vec_oprnds1
,
1626 slp_tree slp_node
, int reduc_index
)
1630 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1631 auto_vec
<tree
> ops (nops
);
1632 auto_vec
<vec
<tree
> > vec_defs (nops
);
1634 ops
.quick_push (op0
);
1636 ops
.quick_push (op1
);
1638 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1640 *vec_oprnds0
= vec_defs
[0];
1642 *vec_oprnds1
= vec_defs
[1];
1648 vec_oprnds0
->create (1);
1649 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1650 vec_oprnds0
->quick_push (vec_oprnd
);
1654 vec_oprnds1
->create (1);
1655 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1656 vec_oprnds1
->quick_push (vec_oprnd
);
1662 /* Function vect_finish_stmt_generation.
1664 Insert a new stmt. */
1667 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1668 gimple_stmt_iterator
*gsi
)
1670 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1671 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1672 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1674 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1676 if (!gsi_end_p (*gsi
)
1677 && gimple_has_mem_ops (vec_stmt
))
1679 gimple at_stmt
= gsi_stmt (*gsi
);
1680 tree vuse
= gimple_vuse (at_stmt
);
1681 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1683 tree vdef
= gimple_vdef (at_stmt
);
1684 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1685 /* If we have an SSA vuse and insert a store, update virtual
1686 SSA form to avoid triggering the renamer. Do so only
1687 if we can easily see all uses - which is what almost always
1688 happens with the way vectorized stmts are inserted. */
1689 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1690 && ((is_gimple_assign (vec_stmt
)
1691 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1692 || (is_gimple_call (vec_stmt
)
1693 && !(gimple_call_flags (vec_stmt
)
1694 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1696 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1697 gimple_set_vdef (vec_stmt
, new_vdef
);
1698 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1702 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1704 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1707 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1710 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1711 dump_printf (MSG_NOTE
, "\n");
1714 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1716 /* While EH edges will generally prevent vectorization, stmt might
1717 e.g. be in a must-not-throw region. Ensure newly created stmts
1718 that could throw are part of the same region. */
1719 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1720 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1721 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1724 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1725 a function declaration if the target has a vectorized version
1726 of the function, or NULL_TREE if the function cannot be vectorized. */
1729 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1731 tree fndecl
= gimple_call_fndecl (call
);
1733 /* We only handle functions that do not read or clobber memory -- i.e.
1734 const or novops ones. */
1735 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1739 || TREE_CODE (fndecl
) != FUNCTION_DECL
1740 || !DECL_BUILT_IN (fndecl
))
1743 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1748 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1749 gimple_stmt_iterator
*);
1752 /* Function vectorizable_mask_load_store.
1754 Check if STMT performs a conditional load or store that can be vectorized.
1755 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1756 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1757 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1760 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1761 gimple
*vec_stmt
, slp_tree slp_node
)
1763 tree vec_dest
= NULL
;
1764 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1765 stmt_vec_info prev_stmt_info
;
1766 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1767 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1768 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1769 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1770 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1774 tree dataref_ptr
= NULL_TREE
;
1776 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1780 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1781 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1782 int gather_scale
= 1;
1783 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1788 enum vect_def_type dt
;
1790 if (slp_node
!= NULL
)
1793 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1794 gcc_assert (ncopies
>= 1);
1796 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1797 mask
= gimple_call_arg (stmt
, 2);
1798 if (TYPE_PRECISION (TREE_TYPE (mask
))
1799 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1802 /* FORNOW. This restriction should be relaxed. */
1803 if (nested_in_vect_loop
&& ncopies
> 1)
1805 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1807 "multiple types in nested loop.");
1811 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1814 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1817 if (!STMT_VINFO_DATA_REF (stmt_info
))
1820 elem_type
= TREE_TYPE (vectype
);
1822 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1825 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1828 if (STMT_VINFO_GATHER_P (stmt_info
))
1832 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1833 &gather_off
, &gather_scale
);
1834 gcc_assert (gather_decl
);
1835 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1836 &def_stmt
, &def
, &gather_dt
,
1837 &gather_off_vectype
))
1839 if (dump_enabled_p ())
1840 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1841 "gather index use not simple.");
1845 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1847 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1848 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1850 if (dump_enabled_p ())
1851 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1852 "masked gather with integer mask not supported.");
1856 else if (tree_int_cst_compare (nested_in_vect_loop
1857 ? STMT_VINFO_DR_STEP (stmt_info
)
1858 : DR_STEP (dr
), size_zero_node
) <= 0)
1860 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1861 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1864 if (TREE_CODE (mask
) != SSA_NAME
)
1867 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1868 &def_stmt
, &def
, &dt
))
1873 tree rhs
= gimple_call_arg (stmt
, 3);
1874 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1875 &def_stmt
, &def
, &dt
))
1879 if (!vec_stmt
) /* transformation not required. */
1881 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1883 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1886 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1892 if (STMT_VINFO_GATHER_P (stmt_info
))
1894 tree vec_oprnd0
= NULL_TREE
, op
;
1895 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1896 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1897 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1898 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1899 tree mask_perm_mask
= NULL_TREE
;
1900 edge pe
= loop_preheader_edge (loop
);
1903 enum { NARROW
, NONE
, WIDEN
} modifier
;
1904 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1906 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1907 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1908 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1909 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1910 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1911 scaletype
= TREE_VALUE (arglist
);
1912 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1913 && types_compatible_p (srctype
, masktype
));
1915 if (nunits
== gather_off_nunits
)
1917 else if (nunits
== gather_off_nunits
/ 2)
1919 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1922 for (i
= 0; i
< gather_off_nunits
; ++i
)
1923 sel
[i
] = i
| nunits
;
1925 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1927 else if (nunits
== gather_off_nunits
* 2)
1929 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1932 for (i
= 0; i
< nunits
; ++i
)
1933 sel
[i
] = i
< gather_off_nunits
1934 ? i
: i
+ nunits
- gather_off_nunits
;
1936 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1938 for (i
= 0; i
< nunits
; ++i
)
1939 sel
[i
] = i
| gather_off_nunits
;
1940 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1945 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1947 ptr
= fold_convert (ptrtype
, gather_base
);
1948 if (!is_gimple_min_invariant (ptr
))
1950 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1951 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1952 gcc_assert (!new_bb
);
1955 scale
= build_int_cst (scaletype
, gather_scale
);
1957 prev_stmt_info
= NULL
;
1958 for (j
= 0; j
< ncopies
; ++j
)
1960 if (modifier
== WIDEN
&& (j
& 1))
1961 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1962 perm_mask
, stmt
, gsi
);
1965 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1968 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1970 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1972 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1973 == TYPE_VECTOR_SUBPARTS (idxtype
));
1974 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1975 var
= make_ssa_name (var
);
1976 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1978 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1979 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1983 if (mask_perm_mask
&& (j
& 1))
1984 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1985 mask_perm_mask
, stmt
, gsi
);
1989 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1992 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1993 &def_stmt
, &def
, &dt
);
1994 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1998 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2000 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2001 == TYPE_VECTOR_SUBPARTS (masktype
));
2002 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2004 var
= make_ssa_name (var
);
2005 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2007 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2008 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2014 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2017 if (!useless_type_conversion_p (vectype
, rettype
))
2019 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2020 == TYPE_VECTOR_SUBPARTS (rettype
));
2021 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2022 op
= make_ssa_name (var
, new_stmt
);
2023 gimple_call_set_lhs (new_stmt
, op
);
2024 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2025 var
= make_ssa_name (vec_dest
);
2026 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2027 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2031 var
= make_ssa_name (vec_dest
, new_stmt
);
2032 gimple_call_set_lhs (new_stmt
, var
);
2035 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2037 if (modifier
== NARROW
)
2044 var
= permute_vec_elements (prev_res
, var
,
2045 perm_mask
, stmt
, gsi
);
2046 new_stmt
= SSA_NAME_DEF_STMT (var
);
2049 if (prev_stmt_info
== NULL
)
2050 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2052 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2053 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2056 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2058 tree lhs
= gimple_call_lhs (stmt
);
2059 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2060 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2061 set_vinfo_for_stmt (stmt
, NULL
);
2062 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2063 gsi_replace (gsi
, new_stmt
, true);
2068 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2069 prev_stmt_info
= NULL
;
2070 for (i
= 0; i
< ncopies
; i
++)
2072 unsigned align
, misalign
;
2076 tree rhs
= gimple_call_arg (stmt
, 3);
2077 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2078 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2079 /* We should have catched mismatched types earlier. */
2080 gcc_assert (useless_type_conversion_p (vectype
,
2081 TREE_TYPE (vec_rhs
)));
2082 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2083 NULL_TREE
, &dummy
, gsi
,
2084 &ptr_incr
, false, &inv_p
);
2085 gcc_assert (!inv_p
);
2089 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2091 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2092 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2094 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2095 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2096 TYPE_SIZE_UNIT (vectype
));
2099 align
= TYPE_ALIGN_UNIT (vectype
);
2100 if (aligned_access_p (dr
))
2102 else if (DR_MISALIGNMENT (dr
) == -1)
2104 align
= TYPE_ALIGN_UNIT (elem_type
);
2108 misalign
= DR_MISALIGNMENT (dr
);
2109 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2112 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2113 gimple_call_arg (stmt
, 1),
2115 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2117 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2119 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2120 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2125 tree vec_mask
= NULL_TREE
;
2126 prev_stmt_info
= NULL
;
2127 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2128 for (i
= 0; i
< ncopies
; i
++)
2130 unsigned align
, misalign
;
2134 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2135 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2136 NULL_TREE
, &dummy
, gsi
,
2137 &ptr_incr
, false, &inv_p
);
2138 gcc_assert (!inv_p
);
2142 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2144 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2145 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2146 TYPE_SIZE_UNIT (vectype
));
2149 align
= TYPE_ALIGN_UNIT (vectype
);
2150 if (aligned_access_p (dr
))
2152 else if (DR_MISALIGNMENT (dr
) == -1)
2154 align
= TYPE_ALIGN_UNIT (elem_type
);
2158 misalign
= DR_MISALIGNMENT (dr
);
2159 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2162 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2163 gimple_call_arg (stmt
, 1),
2165 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2166 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2168 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2170 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2171 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2177 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2179 tree lhs
= gimple_call_lhs (stmt
);
2180 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2181 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2182 set_vinfo_for_stmt (stmt
, NULL
);
2183 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2184 gsi_replace (gsi
, new_stmt
, true);
2191 /* Function vectorizable_call.
2193 Check if GS performs a function call that can be vectorized.
2194 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2195 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2196 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2199 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2206 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2207 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2208 tree vectype_out
, vectype_in
;
2211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2212 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2213 tree fndecl
, new_temp
, def
, rhs_type
;
2215 enum vect_def_type dt
[3]
2216 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2217 gimple new_stmt
= NULL
;
2219 vec
<tree
> vargs
= vNULL
;
2220 enum { NARROW
, NONE
, WIDEN
} modifier
;
2224 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2227 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2230 /* Is GS a vectorizable call? */
2231 stmt
= dyn_cast
<gcall
*> (gs
);
2235 if (gimple_call_internal_p (stmt
)
2236 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2237 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2238 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2241 if (gimple_call_lhs (stmt
) == NULL_TREE
2242 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2245 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2247 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2249 /* Process function arguments. */
2250 rhs_type
= NULL_TREE
;
2251 vectype_in
= NULL_TREE
;
2252 nargs
= gimple_call_num_args (stmt
);
2254 /* Bail out if the function has more than three arguments, we do not have
2255 interesting builtin functions to vectorize with more than two arguments
2256 except for fma. No arguments is also not good. */
2257 if (nargs
== 0 || nargs
> 3)
2260 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2261 if (gimple_call_internal_p (stmt
)
2262 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2265 rhs_type
= unsigned_type_node
;
2268 for (i
= 0; i
< nargs
; i
++)
2272 op
= gimple_call_arg (stmt
, i
);
2274 /* We can only handle calls with arguments of the same type. */
2276 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2280 "argument types differ.\n");
2284 rhs_type
= TREE_TYPE (op
);
2286 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2287 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2291 "use not simple.\n");
2296 vectype_in
= opvectype
;
2298 && opvectype
!= vectype_in
)
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2302 "argument vector types differ.\n");
2306 /* If all arguments are external or constant defs use a vector type with
2307 the same size as the output vector type. */
2309 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2311 gcc_assert (vectype_in
);
2314 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2317 "no vectype for scalar type ");
2318 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2319 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2326 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2327 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2328 if (nunits_in
== nunits_out
/ 2)
2330 else if (nunits_out
== nunits_in
)
2332 else if (nunits_out
== nunits_in
/ 2)
2337 /* For now, we only vectorize functions if a target specific builtin
2338 is available. TODO -- in some cases, it might be profitable to
2339 insert the calls for pieces of the vector, in order to be able
2340 to vectorize other operations in the loop. */
2341 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2342 if (fndecl
== NULL_TREE
)
2344 if (gimple_call_internal_p (stmt
)
2345 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2348 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2349 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2350 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2351 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2353 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2354 { 0, 1, 2, ... vf - 1 } vector. */
2355 gcc_assert (nargs
== 0);
2359 if (dump_enabled_p ())
2360 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2361 "function is not vectorizable.\n");
2366 gcc_assert (!gimple_vuse (stmt
));
2368 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2370 else if (modifier
== NARROW
)
2371 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2373 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2375 /* Sanity check: make sure that at least one copy of the vectorized stmt
2376 needs to be generated. */
2377 gcc_assert (ncopies
>= 1);
2379 if (!vec_stmt
) /* transformation not required. */
2381 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2385 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2391 if (dump_enabled_p ())
2392 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2395 scalar_dest
= gimple_call_lhs (stmt
);
2396 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2398 prev_stmt_info
= NULL
;
2402 for (j
= 0; j
< ncopies
; ++j
)
2404 /* Build argument list for the vectorized call. */
2406 vargs
.create (nargs
);
2412 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2413 vec
<tree
> vec_oprnds0
;
2415 for (i
= 0; i
< nargs
; i
++)
2416 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2417 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2418 vec_oprnds0
= vec_defs
[0];
2420 /* Arguments are ready. Create the new vector stmt. */
2421 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2424 for (k
= 0; k
< nargs
; k
++)
2426 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2427 vargs
[k
] = vec_oprndsk
[i
];
2429 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2430 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2431 gimple_call_set_lhs (new_stmt
, new_temp
);
2432 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2433 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2436 for (i
= 0; i
< nargs
; i
++)
2438 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2439 vec_oprndsi
.release ();
2444 for (i
= 0; i
< nargs
; i
++)
2446 op
= gimple_call_arg (stmt
, i
);
2449 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2452 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2454 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2457 vargs
.quick_push (vec_oprnd0
);
2460 if (gimple_call_internal_p (stmt
)
2461 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2463 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2465 for (k
= 0; k
< nunits_out
; ++k
)
2466 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2467 tree cst
= build_vector (vectype_out
, v
);
2469 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2470 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2471 new_temp
= make_ssa_name (new_var
, init_stmt
);
2472 gimple_assign_set_lhs (init_stmt
, new_temp
);
2473 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2474 new_temp
= make_ssa_name (vec_dest
);
2475 new_stmt
= gimple_build_assign (new_temp
,
2476 gimple_assign_lhs (init_stmt
));
2480 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2481 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2482 gimple_call_set_lhs (new_stmt
, new_temp
);
2484 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2487 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2489 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2491 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2497 for (j
= 0; j
< ncopies
; ++j
)
2499 /* Build argument list for the vectorized call. */
2501 vargs
.create (nargs
* 2);
2507 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2508 vec
<tree
> vec_oprnds0
;
2510 for (i
= 0; i
< nargs
; i
++)
2511 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2512 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2513 vec_oprnds0
= vec_defs
[0];
2515 /* Arguments are ready. Create the new vector stmt. */
2516 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2520 for (k
= 0; k
< nargs
; k
++)
2522 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2523 vargs
.quick_push (vec_oprndsk
[i
]);
2524 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2526 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2527 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2528 gimple_call_set_lhs (new_stmt
, new_temp
);
2529 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2530 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2533 for (i
= 0; i
< nargs
; i
++)
2535 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2536 vec_oprndsi
.release ();
2541 for (i
= 0; i
< nargs
; i
++)
2543 op
= gimple_call_arg (stmt
, i
);
2547 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2549 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2553 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2555 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2557 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2560 vargs
.quick_push (vec_oprnd0
);
2561 vargs
.quick_push (vec_oprnd1
);
2564 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2565 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2566 gimple_call_set_lhs (new_stmt
, new_temp
);
2567 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2570 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2572 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2574 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2577 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2582 /* No current target implements this case. */
2588 /* The call in STMT might prevent it from being removed in dce.
2589 We however cannot remove it here, due to the way the ssa name
2590 it defines is mapped to the new definition. So just replace
2591 rhs of the statement with something harmless. */
2596 type
= TREE_TYPE (scalar_dest
);
2597 if (is_pattern_stmt_p (stmt_info
))
2598 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2600 lhs
= gimple_call_lhs (stmt
);
2601 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2602 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2603 set_vinfo_for_stmt (stmt
, NULL
);
2604 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2605 gsi_replace (gsi
, new_stmt
, false);
2611 struct simd_call_arg_info
2615 enum vect_def_type dt
;
2616 HOST_WIDE_INT linear_step
;
2620 /* Function vectorizable_simd_clone_call.
2622 Check if STMT performs a function call that can be vectorized
2623 by calling a simd clone of the function.
2624 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2625 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2626 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2629 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2630 gimple
*vec_stmt
, slp_tree slp_node
)
2635 tree vec_oprnd0
= NULL_TREE
;
2636 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2638 unsigned int nunits
;
2639 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2640 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2641 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2642 tree fndecl
, new_temp
, def
;
2644 gimple new_stmt
= NULL
;
2646 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2647 vec
<tree
> vargs
= vNULL
;
2649 tree lhs
, rtype
, ratype
;
2650 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2652 /* Is STMT a vectorizable call? */
2653 if (!is_gimple_call (stmt
))
2656 fndecl
= gimple_call_fndecl (stmt
);
2657 if (fndecl
== NULL_TREE
)
2660 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2661 if (node
== NULL
|| node
->simd_clones
== NULL
)
2664 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2667 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2670 if (gimple_call_lhs (stmt
)
2671 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2674 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2676 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2678 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2682 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2685 /* Process function arguments. */
2686 nargs
= gimple_call_num_args (stmt
);
2688 /* Bail out if the function has zero arguments. */
2692 arginfo
.create (nargs
);
2694 for (i
= 0; i
< nargs
; i
++)
2696 simd_call_arg_info thisarginfo
;
2699 thisarginfo
.linear_step
= 0;
2700 thisarginfo
.align
= 0;
2701 thisarginfo
.op
= NULL_TREE
;
2703 op
= gimple_call_arg (stmt
, i
);
2704 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2705 &def_stmt
, &def
, &thisarginfo
.dt
,
2706 &thisarginfo
.vectype
)
2707 || thisarginfo
.dt
== vect_uninitialized_def
)
2709 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2711 "use not simple.\n");
2716 if (thisarginfo
.dt
== vect_constant_def
2717 || thisarginfo
.dt
== vect_external_def
)
2718 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2720 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2722 /* For linear arguments, the analyze phase should have saved
2723 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2724 if (i
* 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2725 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2])
2727 gcc_assert (vec_stmt
);
2728 thisarginfo
.linear_step
2729 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2]);
2731 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 1];
2732 /* If loop has been peeled for alignment, we need to adjust it. */
2733 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2734 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2737 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2738 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2];
2739 tree opt
= TREE_TYPE (thisarginfo
.op
);
2740 bias
= fold_convert (TREE_TYPE (step
), bias
);
2741 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2743 = fold_build2 (POINTER_TYPE_P (opt
)
2744 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2745 thisarginfo
.op
, bias
);
2749 && thisarginfo
.dt
!= vect_constant_def
2750 && thisarginfo
.dt
!= vect_external_def
2752 && TREE_CODE (op
) == SSA_NAME
2753 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2755 && tree_fits_shwi_p (iv
.step
))
2757 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2758 thisarginfo
.op
= iv
.base
;
2760 else if ((thisarginfo
.dt
== vect_constant_def
2761 || thisarginfo
.dt
== vect_external_def
)
2762 && POINTER_TYPE_P (TREE_TYPE (op
)))
2763 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2765 arginfo
.quick_push (thisarginfo
);
2768 unsigned int badness
= 0;
2769 struct cgraph_node
*bestn
= NULL
;
2770 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2771 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2773 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2774 n
= n
->simdclone
->next_clone
)
2776 unsigned int this_badness
= 0;
2777 if (n
->simdclone
->simdlen
2778 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2779 || n
->simdclone
->nargs
!= nargs
)
2781 if (n
->simdclone
->simdlen
2782 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2783 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2784 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2785 if (n
->simdclone
->inbranch
)
2786 this_badness
+= 2048;
2787 int target_badness
= targetm
.simd_clone
.usable (n
);
2788 if (target_badness
< 0)
2790 this_badness
+= target_badness
* 512;
2791 /* FORNOW: Have to add code to add the mask argument. */
2792 if (n
->simdclone
->inbranch
)
2794 for (i
= 0; i
< nargs
; i
++)
2796 switch (n
->simdclone
->args
[i
].arg_type
)
2798 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2799 if (!useless_type_conversion_p
2800 (n
->simdclone
->args
[i
].orig_type
,
2801 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2803 else if (arginfo
[i
].dt
== vect_constant_def
2804 || arginfo
[i
].dt
== vect_external_def
2805 || arginfo
[i
].linear_step
)
2808 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2809 if (arginfo
[i
].dt
!= vect_constant_def
2810 && arginfo
[i
].dt
!= vect_external_def
)
2813 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2814 if (arginfo
[i
].dt
== vect_constant_def
2815 || arginfo
[i
].dt
== vect_external_def
2816 || (arginfo
[i
].linear_step
2817 != n
->simdclone
->args
[i
].linear_step
))
2820 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2824 case SIMD_CLONE_ARG_TYPE_MASK
:
2827 if (i
== (size_t) -1)
2829 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2834 if (arginfo
[i
].align
)
2835 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2836 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2838 if (i
== (size_t) -1)
2840 if (bestn
== NULL
|| this_badness
< badness
)
2843 badness
= this_badness
;
2853 for (i
= 0; i
< nargs
; i
++)
2854 if ((arginfo
[i
].dt
== vect_constant_def
2855 || arginfo
[i
].dt
== vect_external_def
)
2856 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2859 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2861 if (arginfo
[i
].vectype
== NULL
2862 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2863 > bestn
->simdclone
->simdlen
))
2870 fndecl
= bestn
->decl
;
2871 nunits
= bestn
->simdclone
->simdlen
;
2872 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2874 /* If the function isn't const, only allow it in simd loops where user
2875 has asserted that at least nunits consecutive iterations can be
2876 performed using SIMD instructions. */
2877 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2878 && gimple_vuse (stmt
))
2884 /* Sanity check: make sure that at least one copy of the vectorized stmt
2885 needs to be generated. */
2886 gcc_assert (ncopies
>= 1);
2888 if (!vec_stmt
) /* transformation not required. */
2890 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
2891 for (i
= 0; i
< nargs
; i
++)
2892 if (bestn
->simdclone
->args
[i
].arg_type
2893 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
2895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 2
2897 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
2898 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
2899 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
2900 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
2901 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
2903 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_NOTE
, vect_location
,
2906 "=== vectorizable_simd_clone_call ===\n");
2907 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2914 if (dump_enabled_p ())
2915 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2918 scalar_dest
= gimple_call_lhs (stmt
);
2919 vec_dest
= NULL_TREE
;
2924 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2925 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2926 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2929 rtype
= TREE_TYPE (ratype
);
2933 prev_stmt_info
= NULL
;
2934 for (j
= 0; j
< ncopies
; ++j
)
2936 /* Build argument list for the vectorized call. */
2938 vargs
.create (nargs
);
2942 for (i
= 0; i
< nargs
; i
++)
2944 unsigned int k
, l
, m
, o
;
2946 op
= gimple_call_arg (stmt
, i
);
2947 switch (bestn
->simdclone
->args
[i
].arg_type
)
2949 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2950 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2951 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2952 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2954 if (TYPE_VECTOR_SUBPARTS (atype
)
2955 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2957 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2958 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2959 / TYPE_VECTOR_SUBPARTS (atype
));
2960 gcc_assert ((k
& (k
- 1)) == 0);
2963 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2966 vec_oprnd0
= arginfo
[i
].op
;
2967 if ((m
& (k
- 1)) == 0)
2969 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2972 arginfo
[i
].op
= vec_oprnd0
;
2974 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2976 bitsize_int ((m
& (k
- 1)) * prec
));
2978 = gimple_build_assign (make_ssa_name (atype
),
2980 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2981 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2985 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2986 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2987 gcc_assert ((k
& (k
- 1)) == 0);
2988 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2990 vec_alloc (ctor_elts
, k
);
2993 for (l
= 0; l
< k
; l
++)
2995 if (m
== 0 && l
== 0)
2997 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3000 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3002 arginfo
[i
].op
= vec_oprnd0
;
3005 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3009 vargs
.safe_push (vec_oprnd0
);
3012 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3014 = gimple_build_assign (make_ssa_name (atype
),
3016 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3017 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3022 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3023 vargs
.safe_push (op
);
3025 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3030 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3035 edge pe
= loop_preheader_edge (loop
);
3036 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3037 gcc_assert (!new_bb
);
3039 tree phi_res
= copy_ssa_name (op
);
3040 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3041 set_vinfo_for_stmt (new_phi
,
3042 new_stmt_vec_info (new_phi
, loop_vinfo
,
3044 add_phi_arg (new_phi
, arginfo
[i
].op
,
3045 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3047 = POINTER_TYPE_P (TREE_TYPE (op
))
3048 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3049 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3050 ? sizetype
: TREE_TYPE (op
);
3052 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3054 tree tcst
= wide_int_to_tree (type
, cst
);
3055 tree phi_arg
= copy_ssa_name (op
);
3057 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3058 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3059 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3060 set_vinfo_for_stmt (new_stmt
,
3061 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3063 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3065 arginfo
[i
].op
= phi_res
;
3066 vargs
.safe_push (phi_res
);
3071 = POINTER_TYPE_P (TREE_TYPE (op
))
3072 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3073 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3074 ? sizetype
: TREE_TYPE (op
);
3076 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3078 tree tcst
= wide_int_to_tree (type
, cst
);
3079 new_temp
= make_ssa_name (TREE_TYPE (op
));
3080 new_stmt
= gimple_build_assign (new_temp
, code
,
3081 arginfo
[i
].op
, tcst
);
3082 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3083 vargs
.safe_push (new_temp
);
3086 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3092 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3095 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3097 new_temp
= create_tmp_var (ratype
);
3098 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3099 == TYPE_VECTOR_SUBPARTS (rtype
))
3100 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3102 new_temp
= make_ssa_name (rtype
, new_stmt
);
3103 gimple_call_set_lhs (new_stmt
, new_temp
);
3105 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3109 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3112 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3113 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3114 gcc_assert ((k
& (k
- 1)) == 0);
3115 for (l
= 0; l
< k
; l
++)
3120 t
= build_fold_addr_expr (new_temp
);
3121 t
= build2 (MEM_REF
, vectype
, t
,
3122 build_int_cst (TREE_TYPE (t
),
3123 l
* prec
/ BITS_PER_UNIT
));
3126 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3127 size_int (prec
), bitsize_int (l
* prec
));
3129 = gimple_build_assign (make_ssa_name (vectype
), t
);
3130 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3131 if (j
== 0 && l
== 0)
3132 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3134 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3136 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3141 tree clobber
= build_constructor (ratype
, NULL
);
3142 TREE_THIS_VOLATILE (clobber
) = 1;
3143 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3148 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3150 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3151 / TYPE_VECTOR_SUBPARTS (rtype
));
3152 gcc_assert ((k
& (k
- 1)) == 0);
3153 if ((j
& (k
- 1)) == 0)
3154 vec_alloc (ret_ctor_elts
, k
);
3157 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3158 for (m
= 0; m
< o
; m
++)
3160 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3161 size_int (m
), NULL_TREE
, NULL_TREE
);
3163 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3164 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3165 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3166 gimple_assign_lhs (new_stmt
));
3168 tree clobber
= build_constructor (ratype
, NULL
);
3169 TREE_THIS_VOLATILE (clobber
) = 1;
3170 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3171 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3174 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3175 if ((j
& (k
- 1)) != k
- 1)
3177 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3179 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3180 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3182 if ((unsigned) j
== k
- 1)
3183 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3185 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3187 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3192 tree t
= build_fold_addr_expr (new_temp
);
3193 t
= build2 (MEM_REF
, vectype
, t
,
3194 build_int_cst (TREE_TYPE (t
), 0));
3196 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3197 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3198 tree clobber
= build_constructor (ratype
, NULL
);
3199 TREE_THIS_VOLATILE (clobber
) = 1;
3200 vect_finish_stmt_generation (stmt
,
3201 gimple_build_assign (new_temp
,
3207 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3209 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3211 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3216 /* The call in STMT might prevent it from being removed in dce.
3217 We however cannot remove it here, due to the way the ssa name
3218 it defines is mapped to the new definition. So just replace
3219 rhs of the statement with something harmless. */
3226 type
= TREE_TYPE (scalar_dest
);
3227 if (is_pattern_stmt_p (stmt_info
))
3228 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3230 lhs
= gimple_call_lhs (stmt
);
3231 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3234 new_stmt
= gimple_build_nop ();
3235 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3236 set_vinfo_for_stmt (stmt
, NULL
);
3237 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3238 gsi_replace (gsi
, new_stmt
, true);
3239 unlink_stmt_vdef (stmt
);
3245 /* Function vect_gen_widened_results_half
3247 Create a vector stmt whose code, type, number of arguments, and result
3248 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3249 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3250 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3251 needs to be created (DECL is a function-decl of a target-builtin).
3252 STMT is the original scalar stmt that we are vectorizing. */
3255 vect_gen_widened_results_half (enum tree_code code
,
3257 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3258 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3264 /* Generate half of the widened result: */
3265 if (code
== CALL_EXPR
)
3267 /* Target specific support */
3268 if (op_type
== binary_op
)
3269 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3271 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3272 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3273 gimple_call_set_lhs (new_stmt
, new_temp
);
3277 /* Generic support */
3278 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3279 if (op_type
!= binary_op
)
3281 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3282 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3283 gimple_assign_set_lhs (new_stmt
, new_temp
);
3285 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3291 /* Get vectorized definitions for loop-based vectorization. For the first
3292 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3293 scalar operand), and for the rest we get a copy with
3294 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3295 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3296 The vectors are collected into VEC_OPRNDS. */
3299 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3300 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3304 /* Get first vector operand. */
3305 /* All the vector operands except the very first one (that is scalar oprnd)
3307 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3308 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3310 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3312 vec_oprnds
->quick_push (vec_oprnd
);
3314 /* Get second vector operand. */
3315 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3316 vec_oprnds
->quick_push (vec_oprnd
);
3320 /* For conversion in multiple steps, continue to get operands
3323 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3327 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3328 For multi-step conversions store the resulting vectors and call the function
3332 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3333 int multi_step_cvt
, gimple stmt
,
3335 gimple_stmt_iterator
*gsi
,
3336 slp_tree slp_node
, enum tree_code code
,
3337 stmt_vec_info
*prev_stmt_info
)
3340 tree vop0
, vop1
, new_tmp
, vec_dest
;
3342 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3344 vec_dest
= vec_dsts
.pop ();
3346 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3348 /* Create demotion operation. */
3349 vop0
= (*vec_oprnds
)[i
];
3350 vop1
= (*vec_oprnds
)[i
+ 1];
3351 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3352 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3353 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3354 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3357 /* Store the resulting vector for next recursive call. */
3358 (*vec_oprnds
)[i
/2] = new_tmp
;
3361 /* This is the last step of the conversion sequence. Store the
3362 vectors in SLP_NODE or in vector info of the scalar statement
3363 (or in STMT_VINFO_RELATED_STMT chain). */
3365 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3368 if (!*prev_stmt_info
)
3369 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3371 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3373 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3378 /* For multi-step demotion operations we first generate demotion operations
3379 from the source type to the intermediate types, and then combine the
3380 results (stored in VEC_OPRNDS) in demotion operation to the destination
3384 /* At each level of recursion we have half of the operands we had at the
3386 vec_oprnds
->truncate ((i
+1)/2);
3387 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3388 stmt
, vec_dsts
, gsi
, slp_node
,
3389 VEC_PACK_TRUNC_EXPR
,
3393 vec_dsts
.quick_push (vec_dest
);
3397 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3398 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3399 the resulting vectors and call the function recursively. */
3402 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3403 vec
<tree
> *vec_oprnds1
,
3404 gimple stmt
, tree vec_dest
,
3405 gimple_stmt_iterator
*gsi
,
3406 enum tree_code code1
,
3407 enum tree_code code2
, tree decl1
,
3408 tree decl2
, int op_type
)
3411 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3412 gimple new_stmt1
, new_stmt2
;
3413 vec
<tree
> vec_tmp
= vNULL
;
3415 vec_tmp
.create (vec_oprnds0
->length () * 2);
3416 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3418 if (op_type
== binary_op
)
3419 vop1
= (*vec_oprnds1
)[i
];
3423 /* Generate the two halves of promotion operation. */
3424 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3425 op_type
, vec_dest
, gsi
, stmt
);
3426 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3427 op_type
, vec_dest
, gsi
, stmt
);
3428 if (is_gimple_call (new_stmt1
))
3430 new_tmp1
= gimple_call_lhs (new_stmt1
);
3431 new_tmp2
= gimple_call_lhs (new_stmt2
);
3435 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3436 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3439 /* Store the results for the next step. */
3440 vec_tmp
.quick_push (new_tmp1
);
3441 vec_tmp
.quick_push (new_tmp2
);
3444 vec_oprnds0
->release ();
3445 *vec_oprnds0
= vec_tmp
;
3449 /* Check if STMT performs a conversion operation, that can be vectorized.
3450 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3451 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3452 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3455 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3456 gimple
*vec_stmt
, slp_tree slp_node
)
3460 tree op0
, op1
= NULL_TREE
;
3461 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3462 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3463 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3464 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3465 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3466 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3470 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3471 gimple new_stmt
= NULL
;
3472 stmt_vec_info prev_stmt_info
;
3475 tree vectype_out
, vectype_in
;
3477 tree lhs_type
, rhs_type
;
3478 enum { NARROW
, NONE
, WIDEN
} modifier
;
3479 vec
<tree
> vec_oprnds0
= vNULL
;
3480 vec
<tree
> vec_oprnds1
= vNULL
;
3482 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3483 int multi_step_cvt
= 0;
3484 vec
<tree
> vec_dsts
= vNULL
;
3485 vec
<tree
> interm_types
= vNULL
;
3486 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3488 machine_mode rhs_mode
;
3489 unsigned short fltsz
;
3491 /* Is STMT a vectorizable conversion? */
3493 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3496 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3499 if (!is_gimple_assign (stmt
))
3502 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3505 code
= gimple_assign_rhs_code (stmt
);
3506 if (!CONVERT_EXPR_CODE_P (code
)
3507 && code
!= FIX_TRUNC_EXPR
3508 && code
!= FLOAT_EXPR
3509 && code
!= WIDEN_MULT_EXPR
3510 && code
!= WIDEN_LSHIFT_EXPR
)
3513 op_type
= TREE_CODE_LENGTH (code
);
3515 /* Check types of lhs and rhs. */
3516 scalar_dest
= gimple_assign_lhs (stmt
);
3517 lhs_type
= TREE_TYPE (scalar_dest
);
3518 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3520 op0
= gimple_assign_rhs1 (stmt
);
3521 rhs_type
= TREE_TYPE (op0
);
3523 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3524 && !((INTEGRAL_TYPE_P (lhs_type
)
3525 && INTEGRAL_TYPE_P (rhs_type
))
3526 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3527 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3530 if ((INTEGRAL_TYPE_P (lhs_type
)
3531 && (TYPE_PRECISION (lhs_type
)
3532 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3533 || (INTEGRAL_TYPE_P (rhs_type
)
3534 && (TYPE_PRECISION (rhs_type
)
3535 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3537 if (dump_enabled_p ())
3538 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3539 "type conversion to/from bit-precision unsupported."
3544 /* Check the operands of the operation. */
3545 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3546 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3548 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3550 "use not simple.\n");
3553 if (op_type
== binary_op
)
3557 op1
= gimple_assign_rhs2 (stmt
);
3558 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3559 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3561 if (CONSTANT_CLASS_P (op0
))
3562 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3563 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3565 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3570 if (dump_enabled_p ())
3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3572 "use not simple.\n");
3577 /* If op0 is an external or constant defs use a vector type of
3578 the same size as the output vector type. */
3580 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3582 gcc_assert (vectype_in
);
3585 if (dump_enabled_p ())
3587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3588 "no vectype for scalar type ");
3589 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3590 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3596 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3597 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3598 if (nunits_in
< nunits_out
)
3600 else if (nunits_out
== nunits_in
)
3605 /* Multiple types in SLP are handled by creating the appropriate number of
3606 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3608 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3610 else if (modifier
== NARROW
)
3611 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3613 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3615 /* Sanity check: make sure that at least one copy of the vectorized stmt
3616 needs to be generated. */
3617 gcc_assert (ncopies
>= 1);
3619 /* Supportable by target? */
3623 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3625 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3630 if (dump_enabled_p ())
3631 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3632 "conversion not supported by target.\n");
3636 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3637 &code1
, &code2
, &multi_step_cvt
,
3640 /* Binary widening operation can only be supported directly by the
3642 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3646 if (code
!= FLOAT_EXPR
3647 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3648 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3651 rhs_mode
= TYPE_MODE (rhs_type
);
3652 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3653 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3654 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3655 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3658 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3659 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3660 if (cvt_type
== NULL_TREE
)
3663 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3665 if (!supportable_convert_operation (code
, vectype_out
,
3666 cvt_type
, &decl1
, &codecvt1
))
3669 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3670 cvt_type
, &codecvt1
,
3671 &codecvt2
, &multi_step_cvt
,
3675 gcc_assert (multi_step_cvt
== 0);
3677 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3678 vectype_in
, &code1
, &code2
,
3679 &multi_step_cvt
, &interm_types
))
3683 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3686 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3687 codecvt2
= ERROR_MARK
;
3691 interm_types
.safe_push (cvt_type
);
3692 cvt_type
= NULL_TREE
;
3697 gcc_assert (op_type
== unary_op
);
3698 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3699 &code1
, &multi_step_cvt
,
3703 if (code
!= FIX_TRUNC_EXPR
3704 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3705 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3708 rhs_mode
= TYPE_MODE (rhs_type
);
3710 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3711 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3712 if (cvt_type
== NULL_TREE
)
3714 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3717 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3718 &code1
, &multi_step_cvt
,
3727 if (!vec_stmt
) /* transformation not required. */
3729 if (dump_enabled_p ())
3730 dump_printf_loc (MSG_NOTE
, vect_location
,
3731 "=== vectorizable_conversion ===\n");
3732 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3734 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3735 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3737 else if (modifier
== NARROW
)
3739 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3740 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3744 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3745 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3747 interm_types
.release ();
3752 if (dump_enabled_p ())
3753 dump_printf_loc (MSG_NOTE
, vect_location
,
3754 "transform conversion. ncopies = %d.\n", ncopies
);
3756 if (op_type
== binary_op
)
3758 if (CONSTANT_CLASS_P (op0
))
3759 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3760 else if (CONSTANT_CLASS_P (op1
))
3761 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3764 /* In case of multi-step conversion, we first generate conversion operations
3765 to the intermediate types, and then from that types to the final one.
3766 We create vector destinations for the intermediate type (TYPES) received
3767 from supportable_*_operation, and store them in the correct order
3768 for future use in vect_create_vectorized_*_stmts (). */
3769 vec_dsts
.create (multi_step_cvt
+ 1);
3770 vec_dest
= vect_create_destination_var (scalar_dest
,
3771 (cvt_type
&& modifier
== WIDEN
)
3772 ? cvt_type
: vectype_out
);
3773 vec_dsts
.quick_push (vec_dest
);
3777 for (i
= interm_types
.length () - 1;
3778 interm_types
.iterate (i
, &intermediate_type
); i
--)
3780 vec_dest
= vect_create_destination_var (scalar_dest
,
3782 vec_dsts
.quick_push (vec_dest
);
3787 vec_dest
= vect_create_destination_var (scalar_dest
,
3789 ? vectype_out
: cvt_type
);
3793 if (modifier
== WIDEN
)
3795 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3796 if (op_type
== binary_op
)
3797 vec_oprnds1
.create (1);
3799 else if (modifier
== NARROW
)
3800 vec_oprnds0
.create (
3801 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3803 else if (code
== WIDEN_LSHIFT_EXPR
)
3804 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3807 prev_stmt_info
= NULL
;
3811 for (j
= 0; j
< ncopies
; j
++)
3814 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3817 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3819 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3821 /* Arguments are ready, create the new vector stmt. */
3822 if (code1
== CALL_EXPR
)
3824 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3825 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3826 gimple_call_set_lhs (new_stmt
, new_temp
);
3830 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3831 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3832 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3833 gimple_assign_set_lhs (new_stmt
, new_temp
);
3836 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3838 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3842 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3844 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3845 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3850 /* In case the vectorization factor (VF) is bigger than the number
3851 of elements that we can fit in a vectype (nunits), we have to
3852 generate more than one vector stmt - i.e - we need to "unroll"
3853 the vector stmt by a factor VF/nunits. */
3854 for (j
= 0; j
< ncopies
; j
++)
3861 if (code
== WIDEN_LSHIFT_EXPR
)
3866 /* Store vec_oprnd1 for every vector stmt to be created
3867 for SLP_NODE. We check during the analysis that all
3868 the shift arguments are the same. */
3869 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3870 vec_oprnds1
.quick_push (vec_oprnd1
);
3872 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3876 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3877 &vec_oprnds1
, slp_node
, -1);
3881 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3882 vec_oprnds0
.quick_push (vec_oprnd0
);
3883 if (op_type
== binary_op
)
3885 if (code
== WIDEN_LSHIFT_EXPR
)
3888 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3890 vec_oprnds1
.quick_push (vec_oprnd1
);
3896 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3897 vec_oprnds0
.truncate (0);
3898 vec_oprnds0
.quick_push (vec_oprnd0
);
3899 if (op_type
== binary_op
)
3901 if (code
== WIDEN_LSHIFT_EXPR
)
3904 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3906 vec_oprnds1
.truncate (0);
3907 vec_oprnds1
.quick_push (vec_oprnd1
);
3911 /* Arguments are ready. Create the new vector stmts. */
3912 for (i
= multi_step_cvt
; i
>= 0; i
--)
3914 tree this_dest
= vec_dsts
[i
];
3915 enum tree_code c1
= code1
, c2
= code2
;
3916 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3921 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3923 stmt
, this_dest
, gsi
,
3924 c1
, c2
, decl1
, decl2
,
3928 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3932 if (codecvt1
== CALL_EXPR
)
3934 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3935 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3936 gimple_call_set_lhs (new_stmt
, new_temp
);
3940 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3941 new_temp
= make_ssa_name (vec_dest
);
3942 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
3946 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3949 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3952 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3955 if (!prev_stmt_info
)
3956 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3958 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3959 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3964 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3968 /* In case the vectorization factor (VF) is bigger than the number
3969 of elements that we can fit in a vectype (nunits), we have to
3970 generate more than one vector stmt - i.e - we need to "unroll"
3971 the vector stmt by a factor VF/nunits. */
3972 for (j
= 0; j
< ncopies
; j
++)
3976 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3980 vec_oprnds0
.truncate (0);
3981 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3982 vect_pow2 (multi_step_cvt
) - 1);
3985 /* Arguments are ready. Create the new vector stmts. */
3987 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3989 if (codecvt1
== CALL_EXPR
)
3991 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3992 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3993 gimple_call_set_lhs (new_stmt
, new_temp
);
3997 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3998 new_temp
= make_ssa_name (vec_dest
);
3999 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4003 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4004 vec_oprnds0
[i
] = new_temp
;
4007 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4008 stmt
, vec_dsts
, gsi
,
4013 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4017 vec_oprnds0
.release ();
4018 vec_oprnds1
.release ();
4019 vec_dsts
.release ();
4020 interm_types
.release ();
4026 /* Function vectorizable_assignment.
4028 Check if STMT performs an assignment (copy) that can be vectorized.
4029 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4030 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4031 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4034 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4035 gimple
*vec_stmt
, slp_tree slp_node
)
4040 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4041 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4042 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4046 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4047 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4050 vec
<tree
> vec_oprnds
= vNULL
;
4052 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4053 gimple new_stmt
= NULL
;
4054 stmt_vec_info prev_stmt_info
= NULL
;
4055 enum tree_code code
;
4058 /* Multiple types in SLP are handled by creating the appropriate number of
4059 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4061 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4064 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4066 gcc_assert (ncopies
>= 1);
4068 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4071 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4074 /* Is vectorizable assignment? */
4075 if (!is_gimple_assign (stmt
))
4078 scalar_dest
= gimple_assign_lhs (stmt
);
4079 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4082 code
= gimple_assign_rhs_code (stmt
);
4083 if (gimple_assign_single_p (stmt
)
4084 || code
== PAREN_EXPR
4085 || CONVERT_EXPR_CODE_P (code
))
4086 op
= gimple_assign_rhs1 (stmt
);
4090 if (code
== VIEW_CONVERT_EXPR
)
4091 op
= TREE_OPERAND (op
, 0);
4093 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4094 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4096 if (dump_enabled_p ())
4097 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4098 "use not simple.\n");
4102 /* We can handle NOP_EXPR conversions that do not change the number
4103 of elements or the vector size. */
4104 if ((CONVERT_EXPR_CODE_P (code
)
4105 || code
== VIEW_CONVERT_EXPR
)
4107 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4108 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4109 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4112 /* We do not handle bit-precision changes. */
4113 if ((CONVERT_EXPR_CODE_P (code
)
4114 || code
== VIEW_CONVERT_EXPR
)
4115 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4116 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4117 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4118 || ((TYPE_PRECISION (TREE_TYPE (op
))
4119 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4120 /* But a conversion that does not change the bit-pattern is ok. */
4121 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4122 > TYPE_PRECISION (TREE_TYPE (op
)))
4123 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4125 if (dump_enabled_p ())
4126 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4127 "type conversion to/from bit-precision "
4132 if (!vec_stmt
) /* transformation not required. */
4134 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4135 if (dump_enabled_p ())
4136 dump_printf_loc (MSG_NOTE
, vect_location
,
4137 "=== vectorizable_assignment ===\n");
4138 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4143 if (dump_enabled_p ())
4144 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4147 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4150 for (j
= 0; j
< ncopies
; j
++)
4154 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4156 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4158 /* Arguments are ready. create the new vector stmt. */
4159 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4161 if (CONVERT_EXPR_CODE_P (code
)
4162 || code
== VIEW_CONVERT_EXPR
)
4163 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4164 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4165 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4166 gimple_assign_set_lhs (new_stmt
, new_temp
);
4167 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4169 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4176 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4178 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4180 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4183 vec_oprnds
.release ();
4188 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4189 either as shift by a scalar or by a vector. */
4192 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4195 machine_mode vec_mode
;
4200 vectype
= get_vectype_for_scalar_type (scalar_type
);
4204 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4206 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4208 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4210 || (optab_handler (optab
, TYPE_MODE (vectype
))
4211 == CODE_FOR_nothing
))
4215 vec_mode
= TYPE_MODE (vectype
);
4216 icode
= (int) optab_handler (optab
, vec_mode
);
4217 if (icode
== CODE_FOR_nothing
)
4224 /* Function vectorizable_shift.
4226 Check if STMT performs a shift operation that can be vectorized.
4227 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4228 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4232 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4233 gimple
*vec_stmt
, slp_tree slp_node
)
4237 tree op0
, op1
= NULL
;
4238 tree vec_oprnd1
= NULL_TREE
;
4239 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4241 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4242 enum tree_code code
;
4243 machine_mode vec_mode
;
4247 machine_mode optab_op2_mode
;
4250 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4251 gimple new_stmt
= NULL
;
4252 stmt_vec_info prev_stmt_info
;
4259 vec
<tree
> vec_oprnds0
= vNULL
;
4260 vec
<tree
> vec_oprnds1
= vNULL
;
4263 bool scalar_shift_arg
= true;
4264 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4267 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4270 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4273 /* Is STMT a vectorizable binary/unary operation? */
4274 if (!is_gimple_assign (stmt
))
4277 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4280 code
= gimple_assign_rhs_code (stmt
);
4282 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4283 || code
== RROTATE_EXPR
))
4286 scalar_dest
= gimple_assign_lhs (stmt
);
4287 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4288 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4289 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4291 if (dump_enabled_p ())
4292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4293 "bit-precision shifts not supported.\n");
4297 op0
= gimple_assign_rhs1 (stmt
);
4298 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4299 &def_stmt
, &def
, &dt
[0], &vectype
))
4301 if (dump_enabled_p ())
4302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4303 "use not simple.\n");
4306 /* If op0 is an external or constant def use a vector type with
4307 the same size as the output vector type. */
4309 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4311 gcc_assert (vectype
);
4314 if (dump_enabled_p ())
4315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4316 "no vectype for scalar type\n");
4320 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4321 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4322 if (nunits_out
!= nunits_in
)
4325 op1
= gimple_assign_rhs2 (stmt
);
4326 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4327 &def
, &dt
[1], &op1_vectype
))
4329 if (dump_enabled_p ())
4330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4331 "use not simple.\n");
4336 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4340 /* Multiple types in SLP are handled by creating the appropriate number of
4341 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4343 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4346 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4348 gcc_assert (ncopies
>= 1);
4350 /* Determine whether the shift amount is a vector, or scalar. If the
4351 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4353 if (dt
[1] == vect_internal_def
&& !slp_node
)
4354 scalar_shift_arg
= false;
4355 else if (dt
[1] == vect_constant_def
4356 || dt
[1] == vect_external_def
4357 || dt
[1] == vect_internal_def
)
4359 /* In SLP, need to check whether the shift count is the same,
4360 in loops if it is a constant or invariant, it is always
4364 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4367 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4368 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4369 scalar_shift_arg
= false;
4374 if (dump_enabled_p ())
4375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4376 "operand mode requires invariant argument.\n");
4380 /* Vector shifted by vector. */
4381 if (!scalar_shift_arg
)
4383 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4384 if (dump_enabled_p ())
4385 dump_printf_loc (MSG_NOTE
, vect_location
,
4386 "vector/vector shift/rotate found.\n");
4389 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4390 if (op1_vectype
== NULL_TREE
4391 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4393 if (dump_enabled_p ())
4394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4395 "unusable type for last operand in"
4396 " vector/vector shift/rotate.\n");
4400 /* See if the machine has a vector shifted by scalar insn and if not
4401 then see if it has a vector shifted by vector insn. */
4404 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4406 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4408 if (dump_enabled_p ())
4409 dump_printf_loc (MSG_NOTE
, vect_location
,
4410 "vector/scalar shift/rotate found.\n");
4414 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4416 && (optab_handler (optab
, TYPE_MODE (vectype
))
4417 != CODE_FOR_nothing
))
4419 scalar_shift_arg
= false;
4421 if (dump_enabled_p ())
4422 dump_printf_loc (MSG_NOTE
, vect_location
,
4423 "vector/vector shift/rotate found.\n");
4425 /* Unlike the other binary operators, shifts/rotates have
4426 the rhs being int, instead of the same type as the lhs,
4427 so make sure the scalar is the right type if we are
4428 dealing with vectors of long long/long/short/char. */
4429 if (dt
[1] == vect_constant_def
)
4430 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4431 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4435 && TYPE_MODE (TREE_TYPE (vectype
))
4436 != TYPE_MODE (TREE_TYPE (op1
)))
4438 if (dump_enabled_p ())
4439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4440 "unusable type for last operand in"
4441 " vector/vector shift/rotate.\n");
4444 if (vec_stmt
&& !slp_node
)
4446 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4447 op1
= vect_init_vector (stmt
, op1
,
4448 TREE_TYPE (vectype
), NULL
);
4455 /* Supportable by target? */
4458 if (dump_enabled_p ())
4459 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4463 vec_mode
= TYPE_MODE (vectype
);
4464 icode
= (int) optab_handler (optab
, vec_mode
);
4465 if (icode
== CODE_FOR_nothing
)
4467 if (dump_enabled_p ())
4468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4469 "op not supported by target.\n");
4470 /* Check only during analysis. */
4471 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4472 || (vf
< vect_min_worthwhile_factor (code
)
4475 if (dump_enabled_p ())
4476 dump_printf_loc (MSG_NOTE
, vect_location
,
4477 "proceeding using word mode.\n");
4480 /* Worthwhile without SIMD support? Check only during analysis. */
4481 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4482 && vf
< vect_min_worthwhile_factor (code
)
4485 if (dump_enabled_p ())
4486 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4487 "not worthwhile without SIMD support.\n");
4491 if (!vec_stmt
) /* transformation not required. */
4493 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4494 if (dump_enabled_p ())
4495 dump_printf_loc (MSG_NOTE
, vect_location
,
4496 "=== vectorizable_shift ===\n");
4497 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4503 if (dump_enabled_p ())
4504 dump_printf_loc (MSG_NOTE
, vect_location
,
4505 "transform binary/unary operation.\n");
4508 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4510 prev_stmt_info
= NULL
;
4511 for (j
= 0; j
< ncopies
; j
++)
4516 if (scalar_shift_arg
)
4518 /* Vector shl and shr insn patterns can be defined with scalar
4519 operand 2 (shift operand). In this case, use constant or loop
4520 invariant op1 directly, without extending it to vector mode
4522 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4523 if (!VECTOR_MODE_P (optab_op2_mode
))
4525 if (dump_enabled_p ())
4526 dump_printf_loc (MSG_NOTE
, vect_location
,
4527 "operand 1 using scalar mode.\n");
4529 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4530 vec_oprnds1
.quick_push (vec_oprnd1
);
4533 /* Store vec_oprnd1 for every vector stmt to be created
4534 for SLP_NODE. We check during the analysis that all
4535 the shift arguments are the same.
4536 TODO: Allow different constants for different vector
4537 stmts generated for an SLP instance. */
4538 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4539 vec_oprnds1
.quick_push (vec_oprnd1
);
4544 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4545 (a special case for certain kind of vector shifts); otherwise,
4546 operand 1 should be of a vector type (the usual case). */
4548 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4551 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4555 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4557 /* Arguments are ready. Create the new vector stmt. */
4558 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4560 vop1
= vec_oprnds1
[i
];
4561 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4562 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4563 gimple_assign_set_lhs (new_stmt
, new_temp
);
4564 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4566 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4573 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4575 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4576 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4579 vec_oprnds0
.release ();
4580 vec_oprnds1
.release ();
4586 /* Function vectorizable_operation.
4588 Check if STMT performs a binary, unary or ternary operation that can
4590 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4591 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4592 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4595 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4596 gimple
*vec_stmt
, slp_tree slp_node
)
4600 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4601 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4603 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4604 enum tree_code code
;
4605 machine_mode vec_mode
;
4612 enum vect_def_type dt
[3]
4613 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4614 gimple new_stmt
= NULL
;
4615 stmt_vec_info prev_stmt_info
;
4621 vec
<tree
> vec_oprnds0
= vNULL
;
4622 vec
<tree
> vec_oprnds1
= vNULL
;
4623 vec
<tree
> vec_oprnds2
= vNULL
;
4624 tree vop0
, vop1
, vop2
;
4625 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4628 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4631 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4634 /* Is STMT a vectorizable binary/unary operation? */
4635 if (!is_gimple_assign (stmt
))
4638 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4641 code
= gimple_assign_rhs_code (stmt
);
4643 /* For pointer addition, we should use the normal plus for
4644 the vector addition. */
4645 if (code
== POINTER_PLUS_EXPR
)
4648 /* Support only unary or binary operations. */
4649 op_type
= TREE_CODE_LENGTH (code
);
4650 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4652 if (dump_enabled_p ())
4653 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4654 "num. args = %d (not unary/binary/ternary op).\n",
4659 scalar_dest
= gimple_assign_lhs (stmt
);
4660 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4662 /* Most operations cannot handle bit-precision types without extra
4664 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4665 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4666 /* Exception are bitwise binary operations. */
4667 && code
!= BIT_IOR_EXPR
4668 && code
!= BIT_XOR_EXPR
4669 && code
!= BIT_AND_EXPR
)
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4673 "bit-precision arithmetic not supported.\n");
4677 op0
= gimple_assign_rhs1 (stmt
);
4678 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4679 &def_stmt
, &def
, &dt
[0], &vectype
))
4681 if (dump_enabled_p ())
4682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4683 "use not simple.\n");
4686 /* If op0 is an external or constant def use a vector type with
4687 the same size as the output vector type. */
4689 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4691 gcc_assert (vectype
);
4694 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4697 "no vectype for scalar type ");
4698 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4700 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4706 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4707 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4708 if (nunits_out
!= nunits_in
)
4711 if (op_type
== binary_op
|| op_type
== ternary_op
)
4713 op1
= gimple_assign_rhs2 (stmt
);
4714 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4717 if (dump_enabled_p ())
4718 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4719 "use not simple.\n");
4723 if (op_type
== ternary_op
)
4725 op2
= gimple_assign_rhs3 (stmt
);
4726 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4731 "use not simple.\n");
4737 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4741 /* Multiple types in SLP are handled by creating the appropriate number of
4742 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4744 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4747 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4749 gcc_assert (ncopies
>= 1);
4751 /* Shifts are handled in vectorizable_shift (). */
4752 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4753 || code
== RROTATE_EXPR
)
4756 /* Supportable by target? */
4758 vec_mode
= TYPE_MODE (vectype
);
4759 if (code
== MULT_HIGHPART_EXPR
)
4761 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4762 icode
= LAST_INSN_CODE
;
4764 icode
= CODE_FOR_nothing
;
4768 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4771 if (dump_enabled_p ())
4772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4776 icode
= (int) optab_handler (optab
, vec_mode
);
4779 if (icode
== CODE_FOR_nothing
)
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4783 "op not supported by target.\n");
4784 /* Check only during analysis. */
4785 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4786 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4788 if (dump_enabled_p ())
4789 dump_printf_loc (MSG_NOTE
, vect_location
,
4790 "proceeding using word mode.\n");
4793 /* Worthwhile without SIMD support? Check only during analysis. */
4794 if (!VECTOR_MODE_P (vec_mode
)
4796 && vf
< vect_min_worthwhile_factor (code
))
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4800 "not worthwhile without SIMD support.\n");
4804 if (!vec_stmt
) /* transformation not required. */
4806 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4807 if (dump_enabled_p ())
4808 dump_printf_loc (MSG_NOTE
, vect_location
,
4809 "=== vectorizable_operation ===\n");
4810 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4816 if (dump_enabled_p ())
4817 dump_printf_loc (MSG_NOTE
, vect_location
,
4818 "transform binary/unary operation.\n");
4821 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4823 /* In case the vectorization factor (VF) is bigger than the number
4824 of elements that we can fit in a vectype (nunits), we have to generate
4825 more than one vector stmt - i.e - we need to "unroll" the
4826 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4827 from one copy of the vector stmt to the next, in the field
4828 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4829 stages to find the correct vector defs to be used when vectorizing
4830 stmts that use the defs of the current stmt. The example below
4831 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4832 we need to create 4 vectorized stmts):
4834 before vectorization:
4835 RELATED_STMT VEC_STMT
4839 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4841 RELATED_STMT VEC_STMT
4842 VS1_0: vx0 = memref0 VS1_1 -
4843 VS1_1: vx1 = memref1 VS1_2 -
4844 VS1_2: vx2 = memref2 VS1_3 -
4845 VS1_3: vx3 = memref3 - -
4846 S1: x = load - VS1_0
4849 step2: vectorize stmt S2 (done here):
4850 To vectorize stmt S2 we first need to find the relevant vector
4851 def for the first operand 'x'. This is, as usual, obtained from
4852 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4853 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4854 relevant vector def 'vx0'. Having found 'vx0' we can generate
4855 the vector stmt VS2_0, and as usual, record it in the
4856 STMT_VINFO_VEC_STMT of stmt S2.
4857 When creating the second copy (VS2_1), we obtain the relevant vector
4858 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4859 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4860 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4861 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4862 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4863 chain of stmts and pointers:
4864 RELATED_STMT VEC_STMT
4865 VS1_0: vx0 = memref0 VS1_1 -
4866 VS1_1: vx1 = memref1 VS1_2 -
4867 VS1_2: vx2 = memref2 VS1_3 -
4868 VS1_3: vx3 = memref3 - -
4869 S1: x = load - VS1_0
4870 VS2_0: vz0 = vx0 + v1 VS2_1 -
4871 VS2_1: vz1 = vx1 + v1 VS2_2 -
4872 VS2_2: vz2 = vx2 + v1 VS2_3 -
4873 VS2_3: vz3 = vx3 + v1 - -
4874 S2: z = x + 1 - VS2_0 */
4876 prev_stmt_info
= NULL
;
4877 for (j
= 0; j
< ncopies
; j
++)
4882 if (op_type
== binary_op
|| op_type
== ternary_op
)
4883 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4886 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4888 if (op_type
== ternary_op
)
4890 vec_oprnds2
.create (1);
4891 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4898 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4899 if (op_type
== ternary_op
)
4901 tree vec_oprnd
= vec_oprnds2
.pop ();
4902 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4907 /* Arguments are ready. Create the new vector stmt. */
4908 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4910 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4911 ? vec_oprnds1
[i
] : NULL_TREE
);
4912 vop2
= ((op_type
== ternary_op
)
4913 ? vec_oprnds2
[i
] : NULL_TREE
);
4914 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
4915 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4916 gimple_assign_set_lhs (new_stmt
, new_temp
);
4917 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4919 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4926 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4928 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4929 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4932 vec_oprnds0
.release ();
4933 vec_oprnds1
.release ();
4934 vec_oprnds2
.release ();
4939 /* A helper function to ensure data reference DR's base alignment
4943 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4948 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4950 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4951 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4953 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4954 DECL_USER_ALIGN (base_decl
) = 1;
4955 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4960 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4961 reversal of the vector elements. If that is impossible to do,
4965 perm_mask_for_reverse (tree vectype
)
4970 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4971 sel
= XALLOCAVEC (unsigned char, nunits
);
4973 for (i
= 0; i
< nunits
; ++i
)
4974 sel
[i
] = nunits
- 1 - i
;
4976 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4978 return vect_gen_perm_mask_checked (vectype
, sel
);
4981 /* Function vectorizable_store.
4983 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4985 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4986 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4987 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4990 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4996 tree vec_oprnd
= NULL_TREE
;
4997 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4998 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4999 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5001 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5002 struct loop
*loop
= NULL
;
5003 machine_mode vec_mode
;
5005 enum dr_alignment_support alignment_support_scheme
;
5008 enum vect_def_type dt
;
5009 stmt_vec_info prev_stmt_info
= NULL
;
5010 tree dataref_ptr
= NULL_TREE
;
5011 tree dataref_offset
= NULL_TREE
;
5012 gimple ptr_incr
= NULL
;
5013 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5016 gimple next_stmt
, first_stmt
= NULL
;
5017 bool grouped_store
= false;
5018 bool store_lanes_p
= false;
5019 unsigned int group_size
, i
;
5020 vec
<tree
> dr_chain
= vNULL
;
5021 vec
<tree
> oprnds
= vNULL
;
5022 vec
<tree
> result_chain
= vNULL
;
5024 bool negative
= false;
5025 tree offset
= NULL_TREE
;
5026 vec
<tree
> vec_oprnds
= vNULL
;
5027 bool slp
= (slp_node
!= NULL
);
5028 unsigned int vec_num
;
5029 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5033 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5035 /* Multiple types in SLP are handled by creating the appropriate number of
5036 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5038 if (slp
|| PURE_SLP_STMT (stmt_info
))
5041 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5043 gcc_assert (ncopies
>= 1);
5045 /* FORNOW. This restriction should be relaxed. */
5046 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5048 if (dump_enabled_p ())
5049 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5050 "multiple types in nested loop.\n");
5054 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5057 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5060 /* Is vectorizable store? */
5062 if (!is_gimple_assign (stmt
))
5065 scalar_dest
= gimple_assign_lhs (stmt
);
5066 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5067 && is_pattern_stmt_p (stmt_info
))
5068 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5069 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5070 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5071 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5072 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5073 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5074 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5075 && TREE_CODE (scalar_dest
) != MEM_REF
)
5078 gcc_assert (gimple_assign_single_p (stmt
));
5079 op
= gimple_assign_rhs1 (stmt
);
5080 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5085 "use not simple.\n");
5089 elem_type
= TREE_TYPE (vectype
);
5090 vec_mode
= TYPE_MODE (vectype
);
5092 /* FORNOW. In some cases can vectorize even if data-type not supported
5093 (e.g. - array initialization with 0). */
5094 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5097 if (!STMT_VINFO_DATA_REF (stmt_info
))
5101 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5102 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5103 size_zero_node
) < 0;
5104 if (negative
&& ncopies
> 1)
5106 if (dump_enabled_p ())
5107 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5108 "multiple types with negative step.\n");
5114 gcc_assert (!grouped_store
);
5115 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5116 if (alignment_support_scheme
!= dr_aligned
5117 && alignment_support_scheme
!= dr_unaligned_supported
)
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5121 "negative step but alignment required.\n");
5124 if (dt
!= vect_constant_def
5125 && dt
!= vect_external_def
5126 && !perm_mask_for_reverse (vectype
))
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5130 "negative step and reversing not supported.\n");
5135 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5137 grouped_store
= true;
5138 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5139 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5141 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5142 if (vect_store_lanes_supported (vectype
, group_size
))
5143 store_lanes_p
= true;
5144 else if (!vect_grouped_store_supported (vectype
, group_size
))
5148 if (first_stmt
== stmt
)
5150 /* STMT is the leader of the group. Check the operands of all the
5151 stmts of the group. */
5152 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5155 gcc_assert (gimple_assign_single_p (next_stmt
));
5156 op
= gimple_assign_rhs1 (next_stmt
);
5157 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5158 &def_stmt
, &def
, &dt
))
5160 if (dump_enabled_p ())
5161 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5162 "use not simple.\n");
5165 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5170 if (!vec_stmt
) /* transformation not required. */
5172 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5173 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5180 ensure_base_align (stmt_info
, dr
);
5184 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5185 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5187 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5190 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5192 /* We vectorize all the stmts of the interleaving group when we
5193 reach the last stmt in the group. */
5194 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5195 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5204 grouped_store
= false;
5205 /* VEC_NUM is the number of vect stmts to be created for this
5207 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5208 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5209 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5210 op
= gimple_assign_rhs1 (first_stmt
);
5213 /* VEC_NUM is the number of vect stmts to be created for this
5215 vec_num
= group_size
;
5221 group_size
= vec_num
= 1;
5224 if (dump_enabled_p ())
5225 dump_printf_loc (MSG_NOTE
, vect_location
,
5226 "transform store. ncopies = %d\n", ncopies
);
5228 dr_chain
.create (group_size
);
5229 oprnds
.create (group_size
);
5231 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5232 gcc_assert (alignment_support_scheme
);
5233 /* Targets with store-lane instructions must not require explicit
5235 gcc_assert (!store_lanes_p
5236 || alignment_support_scheme
== dr_aligned
5237 || alignment_support_scheme
== dr_unaligned_supported
);
5240 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5243 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5245 aggr_type
= vectype
;
5247 /* In case the vectorization factor (VF) is bigger than the number
5248 of elements that we can fit in a vectype (nunits), we have to generate
5249 more than one vector stmt - i.e - we need to "unroll" the
5250 vector stmt by a factor VF/nunits. For more details see documentation in
5251 vect_get_vec_def_for_copy_stmt. */
5253 /* In case of interleaving (non-unit grouped access):
5260 We create vectorized stores starting from base address (the access of the
5261 first stmt in the chain (S2 in the above example), when the last store stmt
5262 of the chain (S4) is reached:
5265 VS2: &base + vec_size*1 = vx0
5266 VS3: &base + vec_size*2 = vx1
5267 VS4: &base + vec_size*3 = vx3
5269 Then permutation statements are generated:
5271 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5272 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5275 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5276 (the order of the data-refs in the output of vect_permute_store_chain
5277 corresponds to the order of scalar stmts in the interleaving chain - see
5278 the documentation of vect_permute_store_chain()).
5280 In case of both multiple types and interleaving, above vector stores and
5281 permutation stmts are created for every copy. The result vector stmts are
5282 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5283 STMT_VINFO_RELATED_STMT for the next copies.
5286 prev_stmt_info
= NULL
;
5287 for (j
= 0; j
< ncopies
; j
++)
5295 /* Get vectorized arguments for SLP_NODE. */
5296 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5297 NULL
, slp_node
, -1);
5299 vec_oprnd
= vec_oprnds
[0];
5303 /* For interleaved stores we collect vectorized defs for all the
5304 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5305 used as an input to vect_permute_store_chain(), and OPRNDS as
5306 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5308 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5309 OPRNDS are of size 1. */
5310 next_stmt
= first_stmt
;
5311 for (i
= 0; i
< group_size
; i
++)
5313 /* Since gaps are not supported for interleaved stores,
5314 GROUP_SIZE is the exact number of stmts in the chain.
5315 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5316 there is no interleaving, GROUP_SIZE is 1, and only one
5317 iteration of the loop will be executed. */
5318 gcc_assert (next_stmt
5319 && gimple_assign_single_p (next_stmt
));
5320 op
= gimple_assign_rhs1 (next_stmt
);
5322 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5324 dr_chain
.quick_push (vec_oprnd
);
5325 oprnds
.quick_push (vec_oprnd
);
5326 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5330 /* We should have catched mismatched types earlier. */
5331 gcc_assert (useless_type_conversion_p (vectype
,
5332 TREE_TYPE (vec_oprnd
)));
5333 bool simd_lane_access_p
5334 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5335 if (simd_lane_access_p
5336 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5337 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5338 && integer_zerop (DR_OFFSET (first_dr
))
5339 && integer_zerop (DR_INIT (first_dr
))
5340 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5341 get_alias_set (DR_REF (first_dr
))))
5343 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5344 dataref_offset
= build_int_cst (reference_alias_ptr_type
5345 (DR_REF (first_dr
)), 0);
5350 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5351 simd_lane_access_p
? loop
: NULL
,
5352 offset
, &dummy
, gsi
, &ptr_incr
,
5353 simd_lane_access_p
, &inv_p
);
5354 gcc_assert (bb_vinfo
|| !inv_p
);
5358 /* For interleaved stores we created vectorized defs for all the
5359 defs stored in OPRNDS in the previous iteration (previous copy).
5360 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5361 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5363 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5364 OPRNDS are of size 1. */
5365 for (i
= 0; i
< group_size
; i
++)
5368 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5370 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5371 dr_chain
[i
] = vec_oprnd
;
5372 oprnds
[i
] = vec_oprnd
;
5376 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5377 TYPE_SIZE_UNIT (aggr_type
));
5379 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5380 TYPE_SIZE_UNIT (aggr_type
));
5387 /* Combine all the vectors into an array. */
5388 vec_array
= create_vector_array (vectype
, vec_num
);
5389 for (i
= 0; i
< vec_num
; i
++)
5391 vec_oprnd
= dr_chain
[i
];
5392 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5396 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5397 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5398 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5399 gimple_call_set_lhs (new_stmt
, data_ref
);
5400 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5408 result_chain
.create (group_size
);
5410 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5414 next_stmt
= first_stmt
;
5415 for (i
= 0; i
< vec_num
; i
++)
5417 unsigned align
, misalign
;
5420 /* Bump the vector pointer. */
5421 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5425 vec_oprnd
= vec_oprnds
[i
];
5426 else if (grouped_store
)
5427 /* For grouped stores vectorized defs are interleaved in
5428 vect_permute_store_chain(). */
5429 vec_oprnd
= result_chain
[i
];
5431 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5434 : build_int_cst (reference_alias_ptr_type
5435 (DR_REF (first_dr
)), 0));
5436 align
= TYPE_ALIGN_UNIT (vectype
);
5437 if (aligned_access_p (first_dr
))
5439 else if (DR_MISALIGNMENT (first_dr
) == -1)
5441 TREE_TYPE (data_ref
)
5442 = build_aligned_type (TREE_TYPE (data_ref
),
5443 TYPE_ALIGN (elem_type
));
5444 align
= TYPE_ALIGN_UNIT (elem_type
);
5449 TREE_TYPE (data_ref
)
5450 = build_aligned_type (TREE_TYPE (data_ref
),
5451 TYPE_ALIGN (elem_type
));
5452 misalign
= DR_MISALIGNMENT (first_dr
);
5454 if (dataref_offset
== NULL_TREE
)
5455 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5459 && dt
!= vect_constant_def
5460 && dt
!= vect_external_def
)
5462 tree perm_mask
= perm_mask_for_reverse (vectype
);
5464 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5466 tree new_temp
= make_ssa_name (perm_dest
);
5468 /* Generate the permute statement. */
5470 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
5471 vec_oprnd
, perm_mask
);
5472 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5474 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5475 vec_oprnd
= new_temp
;
5478 /* Arguments are ready. Create the new vector stmt. */
5479 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5480 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5485 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5493 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5495 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5496 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5500 dr_chain
.release ();
5502 result_chain
.release ();
5503 vec_oprnds
.release ();
5508 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5509 VECTOR_CST mask. No checks are made that the target platform supports the
5510 mask, so callers may wish to test can_vec_perm_p separately, or use
5511 vect_gen_perm_mask_checked. */
5514 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
5516 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5519 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5521 mask_elt_type
= lang_hooks
.types
.type_for_mode
5522 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5523 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5525 mask_elts
= XALLOCAVEC (tree
, nunits
);
5526 for (i
= nunits
- 1; i
>= 0; i
--)
5527 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5528 mask_vec
= build_vector (mask_type
, mask_elts
);
5533 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5534 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5537 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
5539 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
5540 return vect_gen_perm_mask_any (vectype
, sel
);
5543 /* Given a vector variable X and Y, that was generated for the scalar
5544 STMT, generate instructions to permute the vector elements of X and Y
5545 using permutation mask MASK_VEC, insert them at *GSI and return the
5546 permuted vector variable. */
5549 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5550 gimple_stmt_iterator
*gsi
)
5552 tree vectype
= TREE_TYPE (x
);
5553 tree perm_dest
, data_ref
;
5556 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5557 data_ref
= make_ssa_name (perm_dest
);
5559 /* Generate the permute statement. */
5560 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
5561 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5566 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5567 inserting them on the loops preheader edge. Returns true if we
5568 were successful in doing so (and thus STMT can be moved then),
5569 otherwise returns false. */
5572 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5578 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5580 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5581 if (!gimple_nop_p (def_stmt
)
5582 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5584 /* Make sure we don't need to recurse. While we could do
5585 so in simple cases when there are more complex use webs
5586 we don't have an easy way to preserve stmt order to fulfil
5587 dependencies within them. */
5590 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5592 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5594 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5595 if (!gimple_nop_p (def_stmt2
)
5596 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5606 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5608 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5609 if (!gimple_nop_p (def_stmt
)
5610 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5612 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5613 gsi_remove (&gsi
, false);
5614 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5621 /* vectorizable_load.
5623 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5625 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5626 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5627 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5630 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5631 slp_tree slp_node
, slp_instance slp_node_instance
)
5634 tree vec_dest
= NULL
;
5635 tree data_ref
= NULL
;
5636 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5637 stmt_vec_info prev_stmt_info
;
5638 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5639 struct loop
*loop
= NULL
;
5640 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5641 bool nested_in_vect_loop
= false;
5642 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5643 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5647 gimple new_stmt
= NULL
;
5649 enum dr_alignment_support alignment_support_scheme
;
5650 tree dataref_ptr
= NULL_TREE
;
5651 tree dataref_offset
= NULL_TREE
;
5652 gimple ptr_incr
= NULL
;
5653 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5655 int i
, j
, group_size
, group_gap
;
5656 tree msq
= NULL_TREE
, lsq
;
5657 tree offset
= NULL_TREE
;
5658 tree byte_offset
= NULL_TREE
;
5659 tree realignment_token
= NULL_TREE
;
5661 vec
<tree
> dr_chain
= vNULL
;
5662 bool grouped_load
= false;
5663 bool load_lanes_p
= false;
5666 bool negative
= false;
5667 bool compute_in_loop
= false;
5668 struct loop
*at_loop
;
5670 bool slp
= (slp_node
!= NULL
);
5671 bool slp_perm
= false;
5672 enum tree_code code
;
5673 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5676 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5677 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5678 int gather_scale
= 1;
5679 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5683 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5684 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5685 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5690 /* Multiple types in SLP are handled by creating the appropriate number of
5691 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5693 if (slp
|| PURE_SLP_STMT (stmt_info
))
5696 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5698 gcc_assert (ncopies
>= 1);
5700 /* FORNOW. This restriction should be relaxed. */
5701 if (nested_in_vect_loop
&& ncopies
> 1)
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5705 "multiple types in nested loop.\n");
5709 /* Invalidate assumptions made by dependence analysis when vectorization
5710 on the unrolled body effectively re-orders stmts. */
5712 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5713 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5714 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5718 "cannot perform implicit CSE when unrolling "
5719 "with negative dependence distance\n");
5723 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5726 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5729 /* Is vectorizable load? */
5730 if (!is_gimple_assign (stmt
))
5733 scalar_dest
= gimple_assign_lhs (stmt
);
5734 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5737 code
= gimple_assign_rhs_code (stmt
);
5738 if (code
!= ARRAY_REF
5739 && code
!= BIT_FIELD_REF
5740 && code
!= INDIRECT_REF
5741 && code
!= COMPONENT_REF
5742 && code
!= IMAGPART_EXPR
5743 && code
!= REALPART_EXPR
5745 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5748 if (!STMT_VINFO_DATA_REF (stmt_info
))
5751 elem_type
= TREE_TYPE (vectype
);
5752 mode
= TYPE_MODE (vectype
);
5754 /* FORNOW. In some cases can vectorize even if data-type not supported
5755 (e.g. - data copies). */
5756 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5758 if (dump_enabled_p ())
5759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5760 "Aligned load, but unsupported type.\n");
5764 /* Check if the load is a part of an interleaving chain. */
5765 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5767 grouped_load
= true;
5769 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5771 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5772 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5774 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5775 if (vect_load_lanes_supported (vectype
, group_size
))
5776 load_lanes_p
= true;
5777 else if (!vect_grouped_load_supported (vectype
, group_size
))
5781 /* Invalidate assumptions made by dependence analysis when vectorization
5782 on the unrolled body effectively re-orders stmts. */
5783 if (!PURE_SLP_STMT (stmt_info
)
5784 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5785 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5786 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5788 if (dump_enabled_p ())
5789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5790 "cannot perform implicit CSE when performing "
5791 "group loads with negative dependence distance\n");
5797 if (STMT_VINFO_GATHER_P (stmt_info
))
5801 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5802 &gather_off
, &gather_scale
);
5803 gcc_assert (gather_decl
);
5804 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5805 &def_stmt
, &def
, &gather_dt
,
5806 &gather_off_vectype
))
5808 if (dump_enabled_p ())
5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5810 "gather index use not simple.\n");
5814 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5818 negative
= tree_int_cst_compare (nested_in_vect_loop
5819 ? STMT_VINFO_DR_STEP (stmt_info
)
5821 size_zero_node
) < 0;
5822 if (negative
&& ncopies
> 1)
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5826 "multiple types with negative step.\n");
5834 if (dump_enabled_p ())
5835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5836 "negative step for group load not supported"
5840 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5841 if (alignment_support_scheme
!= dr_aligned
5842 && alignment_support_scheme
!= dr_unaligned_supported
)
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5846 "negative step but alignment required.\n");
5849 if (!perm_mask_for_reverse (vectype
))
5851 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5853 "negative step and reversing not supported."
5860 if (!vec_stmt
) /* transformation not required. */
5862 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5863 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
5867 if (dump_enabled_p ())
5868 dump_printf_loc (MSG_NOTE
, vect_location
,
5869 "transform load. ncopies = %d\n", ncopies
);
5873 ensure_base_align (stmt_info
, dr
);
5875 if (STMT_VINFO_GATHER_P (stmt_info
))
5877 tree vec_oprnd0
= NULL_TREE
, op
;
5878 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
5879 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5880 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
5881 edge pe
= loop_preheader_edge (loop
);
5884 enum { NARROW
, NONE
, WIDEN
} modifier
;
5885 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
5887 if (nunits
== gather_off_nunits
)
5889 else if (nunits
== gather_off_nunits
/ 2)
5891 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
5894 for (i
= 0; i
< gather_off_nunits
; ++i
)
5895 sel
[i
] = i
| nunits
;
5897 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
5899 else if (nunits
== gather_off_nunits
* 2)
5901 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5904 for (i
= 0; i
< nunits
; ++i
)
5905 sel
[i
] = i
< gather_off_nunits
5906 ? i
: i
+ nunits
- gather_off_nunits
;
5908 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5914 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
5915 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5916 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5917 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5918 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5919 scaletype
= TREE_VALUE (arglist
);
5920 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
5922 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5924 ptr
= fold_convert (ptrtype
, gather_base
);
5925 if (!is_gimple_min_invariant (ptr
))
5927 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5928 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5929 gcc_assert (!new_bb
);
5932 /* Currently we support only unconditional gather loads,
5933 so mask should be all ones. */
5934 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
5935 mask
= build_int_cst (masktype
, -1);
5936 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
5938 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
5939 mask
= build_vector_from_val (masktype
, mask
);
5940 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5942 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
5946 for (j
= 0; j
< 6; ++j
)
5948 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
5949 mask
= build_real (TREE_TYPE (masktype
), r
);
5950 mask
= build_vector_from_val (masktype
, mask
);
5951 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5956 scale
= build_int_cst (scaletype
, gather_scale
);
5958 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
5959 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
5960 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
5964 for (j
= 0; j
< 6; ++j
)
5966 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
5967 merge
= build_real (TREE_TYPE (rettype
), r
);
5971 merge
= build_vector_from_val (rettype
, merge
);
5972 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
5974 prev_stmt_info
= NULL
;
5975 for (j
= 0; j
< ncopies
; ++j
)
5977 if (modifier
== WIDEN
&& (j
& 1))
5978 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
5979 perm_mask
, stmt
, gsi
);
5982 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
5985 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
5987 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5989 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5990 == TYPE_VECTOR_SUBPARTS (idxtype
));
5991 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
5992 var
= make_ssa_name (var
);
5993 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5995 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5996 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6001 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6003 if (!useless_type_conversion_p (vectype
, rettype
))
6005 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6006 == TYPE_VECTOR_SUBPARTS (rettype
));
6007 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
6008 op
= make_ssa_name (var
, new_stmt
);
6009 gimple_call_set_lhs (new_stmt
, op
);
6010 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6011 var
= make_ssa_name (vec_dest
);
6012 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6014 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6018 var
= make_ssa_name (vec_dest
, new_stmt
);
6019 gimple_call_set_lhs (new_stmt
, var
);
6022 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6024 if (modifier
== NARROW
)
6031 var
= permute_vec_elements (prev_res
, var
,
6032 perm_mask
, stmt
, gsi
);
6033 new_stmt
= SSA_NAME_DEF_STMT (var
);
6036 if (prev_stmt_info
== NULL
)
6037 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6039 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6040 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6044 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
6046 gimple_stmt_iterator incr_gsi
;
6052 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6053 gimple_seq stmts
= NULL
;
6054 tree stride_base
, stride_step
, alias_off
;
6056 gcc_assert (!nested_in_vect_loop
);
6059 = fold_build_pointer_plus
6060 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6061 size_binop (PLUS_EXPR
,
6062 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6063 convert_to_ptrofftype (DR_INIT (dr
))));
6064 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6066 /* For a load with loop-invariant (but other than power-of-2)
6067 stride (i.e. not a grouped access) like so:
6069 for (i = 0; i < n; i += stride)
6072 we generate a new induction variable and new accesses to
6073 form a new vector (or vectors, depending on ncopies):
6075 for (j = 0; ; j += VF*stride)
6077 tmp2 = array[j + stride];
6079 vectemp = {tmp1, tmp2, ...}
6082 ivstep
= stride_step
;
6083 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6084 build_int_cst (TREE_TYPE (ivstep
), vf
));
6086 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6088 create_iv (stride_base
, ivstep
, NULL
,
6089 loop
, &incr_gsi
, insert_after
,
6091 incr
= gsi_stmt (incr_gsi
);
6092 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6094 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6096 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6098 prev_stmt_info
= NULL
;
6099 running_off
= offvar
;
6100 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6101 for (j
= 0; j
< ncopies
; j
++)
6105 vec_alloc (v
, nunits
);
6106 for (i
= 0; i
< nunits
; i
++)
6108 tree newref
, newoff
;
6110 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
6111 running_off
, alias_off
);
6113 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6116 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6117 newoff
= copy_ssa_name (running_off
);
6118 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6119 running_off
, stride_step
);
6120 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6122 running_off
= newoff
;
6125 vec_inv
= build_constructor (vectype
, v
);
6126 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6127 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6130 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6132 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6133 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6140 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6142 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6143 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6144 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6146 /* Check if the chain of loads is already vectorized. */
6147 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6148 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6149 ??? But we can only do so if there is exactly one
6150 as we have no way to get at the rest. Leave the CSE
6152 ??? With the group load eventually participating
6153 in multiple different permutations (having multiple
6154 slp nodes which refer to the same group) the CSE
6155 is even wrong code. See PR56270. */
6158 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6161 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6162 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6164 /* VEC_NUM is the number of vect stmts to be created for this group. */
6167 grouped_load
= false;
6168 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6169 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6171 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6175 vec_num
= group_size
;
6183 group_size
= vec_num
= 1;
6187 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6188 gcc_assert (alignment_support_scheme
);
6189 /* Targets with load-lane instructions must not require explicit
6191 gcc_assert (!load_lanes_p
6192 || alignment_support_scheme
== dr_aligned
6193 || alignment_support_scheme
== dr_unaligned_supported
);
6195 /* In case the vectorization factor (VF) is bigger than the number
6196 of elements that we can fit in a vectype (nunits), we have to generate
6197 more than one vector stmt - i.e - we need to "unroll" the
6198 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6199 from one copy of the vector stmt to the next, in the field
6200 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6201 stages to find the correct vector defs to be used when vectorizing
6202 stmts that use the defs of the current stmt. The example below
6203 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6204 need to create 4 vectorized stmts):
6206 before vectorization:
6207 RELATED_STMT VEC_STMT
6211 step 1: vectorize stmt S1:
6212 We first create the vector stmt VS1_0, and, as usual, record a
6213 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6214 Next, we create the vector stmt VS1_1, and record a pointer to
6215 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6216 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6218 RELATED_STMT VEC_STMT
6219 VS1_0: vx0 = memref0 VS1_1 -
6220 VS1_1: vx1 = memref1 VS1_2 -
6221 VS1_2: vx2 = memref2 VS1_3 -
6222 VS1_3: vx3 = memref3 - -
6223 S1: x = load - VS1_0
6226 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6227 information we recorded in RELATED_STMT field is used to vectorize
6230 /* In case of interleaving (non-unit grouped access):
6237 Vectorized loads are created in the order of memory accesses
6238 starting from the access of the first stmt of the chain:
6241 VS2: vx1 = &base + vec_size*1
6242 VS3: vx3 = &base + vec_size*2
6243 VS4: vx4 = &base + vec_size*3
6245 Then permutation statements are generated:
6247 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6248 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6251 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6252 (the order of the data-refs in the output of vect_permute_load_chain
6253 corresponds to the order of scalar stmts in the interleaving chain - see
6254 the documentation of vect_permute_load_chain()).
6255 The generation of permutation stmts and recording them in
6256 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6258 In case of both multiple types and interleaving, the vector loads and
6259 permutation stmts above are created for every copy. The result vector
6260 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6261 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6263 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6264 on a target that supports unaligned accesses (dr_unaligned_supported)
6265 we generate the following code:
6269 p = p + indx * vectype_size;
6274 Otherwise, the data reference is potentially unaligned on a target that
6275 does not support unaligned accesses (dr_explicit_realign_optimized) -
6276 then generate the following code, in which the data in each iteration is
6277 obtained by two vector loads, one from the previous iteration, and one
6278 from the current iteration:
6280 msq_init = *(floor(p1))
6281 p2 = initial_addr + VS - 1;
6282 realignment_token = call target_builtin;
6285 p2 = p2 + indx * vectype_size
6287 vec_dest = realign_load (msq, lsq, realignment_token)
6292 /* If the misalignment remains the same throughout the execution of the
6293 loop, we can create the init_addr and permutation mask at the loop
6294 preheader. Otherwise, it needs to be created inside the loop.
6295 This can only occur when vectorizing memory accesses in the inner-loop
6296 nested within an outer-loop that is being vectorized. */
6298 if (nested_in_vect_loop
6299 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6300 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6302 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6303 compute_in_loop
= true;
6306 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6307 || alignment_support_scheme
== dr_explicit_realign
)
6308 && !compute_in_loop
)
6310 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6311 alignment_support_scheme
, NULL_TREE
,
6313 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6315 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6316 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6324 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6327 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6329 aggr_type
= vectype
;
6331 prev_stmt_info
= NULL
;
6332 for (j
= 0; j
< ncopies
; j
++)
6334 /* 1. Create the vector or array pointer update chain. */
6337 bool simd_lane_access_p
6338 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6339 if (simd_lane_access_p
6340 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6341 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6342 && integer_zerop (DR_OFFSET (first_dr
))
6343 && integer_zerop (DR_INIT (first_dr
))
6344 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6345 get_alias_set (DR_REF (first_dr
)))
6346 && (alignment_support_scheme
== dr_aligned
6347 || alignment_support_scheme
== dr_unaligned_supported
))
6349 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6350 dataref_offset
= build_int_cst (reference_alias_ptr_type
6351 (DR_REF (first_dr
)), 0);
6356 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6357 offset
, &dummy
, gsi
, &ptr_incr
,
6358 simd_lane_access_p
, &inv_p
,
6361 else if (dataref_offset
)
6362 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6363 TYPE_SIZE_UNIT (aggr_type
));
6365 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6366 TYPE_SIZE_UNIT (aggr_type
));
6368 if (grouped_load
|| slp_perm
)
6369 dr_chain
.create (vec_num
);
6375 vec_array
= create_vector_array (vectype
, vec_num
);
6378 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6379 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6380 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6381 gimple_call_set_lhs (new_stmt
, vec_array
);
6382 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6384 /* Extract each vector into an SSA_NAME. */
6385 for (i
= 0; i
< vec_num
; i
++)
6387 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6389 dr_chain
.quick_push (new_temp
);
6392 /* Record the mapping between SSA_NAMEs and statements. */
6393 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6397 for (i
= 0; i
< vec_num
; i
++)
6400 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6403 /* 2. Create the vector-load in the loop. */
6404 switch (alignment_support_scheme
)
6407 case dr_unaligned_supported
:
6409 unsigned int align
, misalign
;
6412 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6415 : build_int_cst (reference_alias_ptr_type
6416 (DR_REF (first_dr
)), 0));
6417 align
= TYPE_ALIGN_UNIT (vectype
);
6418 if (alignment_support_scheme
== dr_aligned
)
6420 gcc_assert (aligned_access_p (first_dr
));
6423 else if (DR_MISALIGNMENT (first_dr
) == -1)
6425 TREE_TYPE (data_ref
)
6426 = build_aligned_type (TREE_TYPE (data_ref
),
6427 TYPE_ALIGN (elem_type
));
6428 align
= TYPE_ALIGN_UNIT (elem_type
);
6433 TREE_TYPE (data_ref
)
6434 = build_aligned_type (TREE_TYPE (data_ref
),
6435 TYPE_ALIGN (elem_type
));
6436 misalign
= DR_MISALIGNMENT (first_dr
);
6438 if (dataref_offset
== NULL_TREE
)
6439 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6443 case dr_explicit_realign
:
6448 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6450 if (compute_in_loop
)
6451 msq
= vect_setup_realignment (first_stmt
, gsi
,
6453 dr_explicit_realign
,
6456 ptr
= copy_ssa_name (dataref_ptr
);
6457 new_stmt
= gimple_build_assign
6458 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
6460 (TREE_TYPE (dataref_ptr
),
6461 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6462 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6464 = build2 (MEM_REF
, vectype
, ptr
,
6465 build_int_cst (reference_alias_ptr_type
6466 (DR_REF (first_dr
)), 0));
6467 vec_dest
= vect_create_destination_var (scalar_dest
,
6469 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6470 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6471 gimple_assign_set_lhs (new_stmt
, new_temp
);
6472 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6473 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6474 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6477 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6478 TYPE_SIZE_UNIT (elem_type
));
6479 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6480 new_stmt
= gimple_build_assign
6481 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
6484 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6485 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6486 gimple_assign_set_lhs (new_stmt
, ptr
);
6487 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6489 = build2 (MEM_REF
, vectype
, ptr
,
6490 build_int_cst (reference_alias_ptr_type
6491 (DR_REF (first_dr
)), 0));
6494 case dr_explicit_realign_optimized
:
6495 new_temp
= copy_ssa_name (dataref_ptr
);
6496 new_stmt
= gimple_build_assign
6497 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
6499 (TREE_TYPE (dataref_ptr
),
6500 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6501 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6503 = build2 (MEM_REF
, vectype
, new_temp
,
6504 build_int_cst (reference_alias_ptr_type
6505 (DR_REF (first_dr
)), 0));
6510 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6511 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6512 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6513 gimple_assign_set_lhs (new_stmt
, new_temp
);
6514 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6516 /* 3. Handle explicit realignment if necessary/supported.
6518 vec_dest = realign_load (msq, lsq, realignment_token) */
6519 if (alignment_support_scheme
== dr_explicit_realign_optimized
6520 || alignment_support_scheme
== dr_explicit_realign
)
6522 lsq
= gimple_assign_lhs (new_stmt
);
6523 if (!realignment_token
)
6524 realignment_token
= dataref_ptr
;
6525 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6526 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
6527 msq
, lsq
, realignment_token
);
6528 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6529 gimple_assign_set_lhs (new_stmt
, new_temp
);
6530 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6532 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6535 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6536 add_phi_arg (phi
, lsq
,
6537 loop_latch_edge (containing_loop
),
6543 /* 4. Handle invariant-load. */
6544 if (inv_p
&& !bb_vinfo
)
6546 gcc_assert (!grouped_load
);
6547 /* If we have versioned for aliasing or the loop doesn't
6548 have any data dependencies that would preclude this,
6549 then we are sure this is a loop invariant load and
6550 thus we can insert it on the preheader edge. */
6551 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6552 && !nested_in_vect_loop
6553 && hoist_defs_of_uses (stmt
, loop
))
6555 if (dump_enabled_p ())
6557 dump_printf_loc (MSG_NOTE
, vect_location
,
6558 "hoisting out of the vectorized "
6560 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6561 dump_printf (MSG_NOTE
, "\n");
6563 tree tem
= copy_ssa_name (scalar_dest
);
6564 gsi_insert_on_edge_immediate
6565 (loop_preheader_edge (loop
),
6566 gimple_build_assign (tem
,
6568 (gimple_assign_rhs1 (stmt
))));
6569 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6573 gimple_stmt_iterator gsi2
= *gsi
;
6575 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6578 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6579 set_vinfo_for_stmt (new_stmt
,
6580 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6586 tree perm_mask
= perm_mask_for_reverse (vectype
);
6587 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6588 perm_mask
, stmt
, gsi
);
6589 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6592 /* Collect vector loads and later create their permutation in
6593 vect_transform_grouped_load (). */
6594 if (grouped_load
|| slp_perm
)
6595 dr_chain
.quick_push (new_temp
);
6597 /* Store vector loads in the corresponding SLP_NODE. */
6598 if (slp
&& !slp_perm
)
6599 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6601 /* Bump the vector pointer to account for a gap. */
6602 if (slp
&& group_gap
!= 0)
6604 tree bump
= size_binop (MULT_EXPR
,
6605 TYPE_SIZE_UNIT (elem_type
),
6606 size_int (group_gap
));
6607 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6612 if (slp
&& !slp_perm
)
6617 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6618 slp_node_instance
, false))
6620 dr_chain
.release ();
6629 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6630 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6635 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6637 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6638 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6641 dr_chain
.release ();
6647 /* Function vect_is_simple_cond.
6650 LOOP - the loop that is being vectorized.
6651 COND - Condition that is checked for simple use.
6654 *COMP_VECTYPE - the vector type for the comparison.
6656 Returns whether a COND can be vectorized. Checks whether
6657 condition operands are supportable using vec_is_simple_use. */
6660 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6661 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6665 enum vect_def_type dt
;
6666 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6668 if (!COMPARISON_CLASS_P (cond
))
6671 lhs
= TREE_OPERAND (cond
, 0);
6672 rhs
= TREE_OPERAND (cond
, 1);
6674 if (TREE_CODE (lhs
) == SSA_NAME
)
6676 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6677 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6678 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6681 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6682 && TREE_CODE (lhs
) != FIXED_CST
)
6685 if (TREE_CODE (rhs
) == SSA_NAME
)
6687 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6688 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6689 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6692 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6693 && TREE_CODE (rhs
) != FIXED_CST
)
6696 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6700 /* vectorizable_condition.
6702 Check if STMT is conditional modify expression that can be vectorized.
6703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6704 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6707 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6708 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6709 else caluse if it is 2).
6711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6714 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6715 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6718 tree scalar_dest
= NULL_TREE
;
6719 tree vec_dest
= NULL_TREE
;
6720 tree cond_expr
, then_clause
, else_clause
;
6721 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6722 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6723 tree comp_vectype
= NULL_TREE
;
6724 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6725 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6726 tree vec_compare
, vec_cond_expr
;
6728 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6730 enum vect_def_type dt
, dts
[4];
6731 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6733 enum tree_code code
;
6734 stmt_vec_info prev_stmt_info
= NULL
;
6736 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6737 vec
<tree
> vec_oprnds0
= vNULL
;
6738 vec
<tree
> vec_oprnds1
= vNULL
;
6739 vec
<tree
> vec_oprnds2
= vNULL
;
6740 vec
<tree
> vec_oprnds3
= vNULL
;
6743 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6746 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6748 gcc_assert (ncopies
>= 1);
6749 if (reduc_index
&& ncopies
> 1)
6750 return false; /* FORNOW */
6752 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6755 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6758 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6759 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6763 /* FORNOW: not yet supported. */
6764 if (STMT_VINFO_LIVE_P (stmt_info
))
6766 if (dump_enabled_p ())
6767 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6768 "value used after loop.\n");
6772 /* Is vectorizable conditional operation? */
6773 if (!is_gimple_assign (stmt
))
6776 code
= gimple_assign_rhs_code (stmt
);
6778 if (code
!= COND_EXPR
)
6781 cond_expr
= gimple_assign_rhs1 (stmt
);
6782 then_clause
= gimple_assign_rhs2 (stmt
);
6783 else_clause
= gimple_assign_rhs3 (stmt
);
6785 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6790 if (TREE_CODE (then_clause
) == SSA_NAME
)
6792 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6793 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6794 &then_def_stmt
, &def
, &dt
))
6797 else if (TREE_CODE (then_clause
) != INTEGER_CST
6798 && TREE_CODE (then_clause
) != REAL_CST
6799 && TREE_CODE (then_clause
) != FIXED_CST
)
6802 if (TREE_CODE (else_clause
) == SSA_NAME
)
6804 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6805 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6806 &else_def_stmt
, &def
, &dt
))
6809 else if (TREE_CODE (else_clause
) != INTEGER_CST
6810 && TREE_CODE (else_clause
) != REAL_CST
6811 && TREE_CODE (else_clause
) != FIXED_CST
)
6814 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
6815 /* The result of a vector comparison should be signed type. */
6816 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
6817 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
6818 if (vec_cmp_type
== NULL_TREE
)
6823 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6824 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
6831 vec_oprnds0
.create (1);
6832 vec_oprnds1
.create (1);
6833 vec_oprnds2
.create (1);
6834 vec_oprnds3
.create (1);
6838 scalar_dest
= gimple_assign_lhs (stmt
);
6839 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6841 /* Handle cond expr. */
6842 for (j
= 0; j
< ncopies
; j
++)
6844 gassign
*new_stmt
= NULL
;
6849 auto_vec
<tree
, 4> ops
;
6850 auto_vec
<vec
<tree
>, 4> vec_defs
;
6852 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
6853 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
6854 ops
.safe_push (then_clause
);
6855 ops
.safe_push (else_clause
);
6856 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
6857 vec_oprnds3
= vec_defs
.pop ();
6858 vec_oprnds2
= vec_defs
.pop ();
6859 vec_oprnds1
= vec_defs
.pop ();
6860 vec_oprnds0
= vec_defs
.pop ();
6863 vec_defs
.release ();
6869 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
6871 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
6872 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
6875 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
6877 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
6878 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
6879 if (reduc_index
== 1)
6880 vec_then_clause
= reduc_def
;
6883 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
6885 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
6886 NULL
, >emp
, &def
, &dts
[2]);
6888 if (reduc_index
== 2)
6889 vec_else_clause
= reduc_def
;
6892 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
6894 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
6895 NULL
, >emp
, &def
, &dts
[3]);
6901 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
6902 vec_oprnds0
.pop ());
6903 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
6904 vec_oprnds1
.pop ());
6905 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
6906 vec_oprnds2
.pop ());
6907 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
6908 vec_oprnds3
.pop ());
6913 vec_oprnds0
.quick_push (vec_cond_lhs
);
6914 vec_oprnds1
.quick_push (vec_cond_rhs
);
6915 vec_oprnds2
.quick_push (vec_then_clause
);
6916 vec_oprnds3
.quick_push (vec_else_clause
);
6919 /* Arguments are ready. Create the new vector stmt. */
6920 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
6922 vec_cond_rhs
= vec_oprnds1
[i
];
6923 vec_then_clause
= vec_oprnds2
[i
];
6924 vec_else_clause
= vec_oprnds3
[i
];
6926 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
6927 vec_cond_lhs
, vec_cond_rhs
);
6928 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6929 vec_compare
, vec_then_clause
, vec_else_clause
);
6931 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6932 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6933 gimple_assign_set_lhs (new_stmt
, new_temp
);
6934 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6936 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6943 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6945 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6947 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6950 vec_oprnds0
.release ();
6951 vec_oprnds1
.release ();
6952 vec_oprnds2
.release ();
6953 vec_oprnds3
.release ();
6959 /* Make sure the statement is vectorizable. */
6962 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
6964 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6965 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6966 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
6968 tree scalar_type
, vectype
;
6969 gimple pattern_stmt
;
6970 gimple_seq pattern_def_seq
;
6972 if (dump_enabled_p ())
6974 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
6975 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6976 dump_printf (MSG_NOTE
, "\n");
6979 if (gimple_has_volatile_ops (stmt
))
6981 if (dump_enabled_p ())
6982 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6983 "not vectorized: stmt has volatile operands\n");
6988 /* Skip stmts that do not need to be vectorized. In loops this is expected
6990 - the COND_EXPR which is the loop exit condition
6991 - any LABEL_EXPRs in the loop
6992 - computations that are used only for array indexing or loop control.
6993 In basic blocks we only analyze statements that are a part of some SLP
6994 instance, therefore, all the statements are relevant.
6996 Pattern statement needs to be analyzed instead of the original statement
6997 if the original statement is not relevant. Otherwise, we analyze both
6998 statements. In basic blocks we are called from some SLP instance
6999 traversal, don't analyze pattern stmts instead, the pattern stmts
7000 already will be part of SLP instance. */
7002 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7003 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7004 && !STMT_VINFO_LIVE_P (stmt_info
))
7006 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7008 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7009 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7011 /* Analyze PATTERN_STMT instead of the original stmt. */
7012 stmt
= pattern_stmt
;
7013 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7014 if (dump_enabled_p ())
7016 dump_printf_loc (MSG_NOTE
, vect_location
,
7017 "==> examining pattern statement: ");
7018 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7019 dump_printf (MSG_NOTE
, "\n");
7024 if (dump_enabled_p ())
7025 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7030 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7033 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7034 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7036 /* Analyze PATTERN_STMT too. */
7037 if (dump_enabled_p ())
7039 dump_printf_loc (MSG_NOTE
, vect_location
,
7040 "==> examining pattern statement: ");
7041 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7042 dump_printf (MSG_NOTE
, "\n");
7045 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7049 if (is_pattern_stmt_p (stmt_info
)
7051 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7053 gimple_stmt_iterator si
;
7055 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7057 gimple pattern_def_stmt
= gsi_stmt (si
);
7058 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7059 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7061 /* Analyze def stmt of STMT if it's a pattern stmt. */
7062 if (dump_enabled_p ())
7064 dump_printf_loc (MSG_NOTE
, vect_location
,
7065 "==> examining pattern def statement: ");
7066 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7067 dump_printf (MSG_NOTE
, "\n");
7070 if (!vect_analyze_stmt (pattern_def_stmt
,
7071 need_to_vectorize
, node
))
7077 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7079 case vect_internal_def
:
7082 case vect_reduction_def
:
7083 case vect_nested_cycle
:
7084 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7085 || relevance
== vect_used_in_outer_by_reduction
7086 || relevance
== vect_unused_in_scope
));
7089 case vect_induction_def
:
7090 case vect_constant_def
:
7091 case vect_external_def
:
7092 case vect_unknown_def_type
:
7099 gcc_assert (PURE_SLP_STMT (stmt_info
));
7101 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7102 if (dump_enabled_p ())
7104 dump_printf_loc (MSG_NOTE
, vect_location
,
7105 "get vectype for scalar type: ");
7106 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7107 dump_printf (MSG_NOTE
, "\n");
7110 vectype
= get_vectype_for_scalar_type (scalar_type
);
7113 if (dump_enabled_p ())
7115 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7116 "not SLPed: unsupported data-type ");
7117 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7119 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7124 if (dump_enabled_p ())
7126 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7127 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7128 dump_printf (MSG_NOTE
, "\n");
7131 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7134 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7136 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7137 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7138 || (is_gimple_call (stmt
)
7139 && gimple_call_lhs (stmt
) == NULL_TREE
));
7140 *need_to_vectorize
= true;
7145 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7146 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7147 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7148 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7149 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7150 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7151 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7152 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7153 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7154 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7155 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7156 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7160 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7161 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7162 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7163 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7164 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7165 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7166 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7167 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7168 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7173 if (dump_enabled_p ())
7175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7176 "not vectorized: relevant stmt not ");
7177 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7178 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7179 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7188 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7189 need extra handling, except for vectorizable reductions. */
7190 if (STMT_VINFO_LIVE_P (stmt_info
)
7191 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7192 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7196 if (dump_enabled_p ())
7198 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7199 "not vectorized: live stmt not ");
7200 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7201 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7202 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7212 /* Function vect_transform_stmt.
7214 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7217 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7218 bool *grouped_store
, slp_tree slp_node
,
7219 slp_instance slp_node_instance
)
7221 bool is_store
= false;
7222 gimple vec_stmt
= NULL
;
7223 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7226 switch (STMT_VINFO_TYPE (stmt_info
))
7228 case type_demotion_vec_info_type
:
7229 case type_promotion_vec_info_type
:
7230 case type_conversion_vec_info_type
:
7231 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7235 case induc_vec_info_type
:
7236 gcc_assert (!slp_node
);
7237 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7241 case shift_vec_info_type
:
7242 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7246 case op_vec_info_type
:
7247 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7251 case assignment_vec_info_type
:
7252 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7256 case load_vec_info_type
:
7257 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7262 case store_vec_info_type
:
7263 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7265 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7267 /* In case of interleaving, the whole chain is vectorized when the
7268 last store in the chain is reached. Store stmts before the last
7269 one are skipped, and there vec_stmt_info shouldn't be freed
7271 *grouped_store
= true;
7272 if (STMT_VINFO_VEC_STMT (stmt_info
))
7279 case condition_vec_info_type
:
7280 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7284 case call_vec_info_type
:
7285 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7286 stmt
= gsi_stmt (*gsi
);
7287 if (is_gimple_call (stmt
)
7288 && gimple_call_internal_p (stmt
)
7289 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7293 case call_simd_clone_vec_info_type
:
7294 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7295 stmt
= gsi_stmt (*gsi
);
7298 case reduc_vec_info_type
:
7299 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7304 if (!STMT_VINFO_LIVE_P (stmt_info
))
7306 if (dump_enabled_p ())
7307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7308 "stmt not supported.\n");
7313 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7314 is being vectorized, but outside the immediately enclosing loop. */
7316 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7317 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7318 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7319 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7320 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7321 || STMT_VINFO_RELEVANT (stmt_info
) ==
7322 vect_used_in_outer_by_reduction
))
7324 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7325 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7326 imm_use_iterator imm_iter
;
7327 use_operand_p use_p
;
7331 if (dump_enabled_p ())
7332 dump_printf_loc (MSG_NOTE
, vect_location
,
7333 "Record the vdef for outer-loop vectorization.\n");
7335 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7336 (to be used when vectorizing outer-loop stmts that use the DEF of
7338 if (gimple_code (stmt
) == GIMPLE_PHI
)
7339 scalar_dest
= PHI_RESULT (stmt
);
7341 scalar_dest
= gimple_assign_lhs (stmt
);
7343 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7345 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7347 exit_phi
= USE_STMT (use_p
);
7348 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7353 /* Handle stmts whose DEF is used outside the loop-nest that is
7354 being vectorized. */
7355 if (STMT_VINFO_LIVE_P (stmt_info
)
7356 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7358 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7363 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7369 /* Remove a group of stores (for SLP or interleaving), free their
7373 vect_remove_stores (gimple first_stmt
)
7375 gimple next
= first_stmt
;
7377 gimple_stmt_iterator next_si
;
7381 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7383 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7384 if (is_pattern_stmt_p (stmt_info
))
7385 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7386 /* Free the attached stmt_vec_info and remove the stmt. */
7387 next_si
= gsi_for_stmt (next
);
7388 unlink_stmt_vdef (next
);
7389 gsi_remove (&next_si
, true);
7390 release_defs (next
);
7391 free_stmt_vec_info (next
);
7397 /* Function new_stmt_vec_info.
7399 Create and initialize a new stmt_vec_info struct for STMT. */
7402 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7403 bb_vec_info bb_vinfo
)
7406 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7408 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7409 STMT_VINFO_STMT (res
) = stmt
;
7410 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7411 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7412 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7413 STMT_VINFO_LIVE_P (res
) = false;
7414 STMT_VINFO_VECTYPE (res
) = NULL
;
7415 STMT_VINFO_VEC_STMT (res
) = NULL
;
7416 STMT_VINFO_VECTORIZABLE (res
) = true;
7417 STMT_VINFO_IN_PATTERN_P (res
) = false;
7418 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7419 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7420 STMT_VINFO_DATA_REF (res
) = NULL
;
7422 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7423 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7424 STMT_VINFO_DR_INIT (res
) = NULL
;
7425 STMT_VINFO_DR_STEP (res
) = NULL
;
7426 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7428 if (gimple_code (stmt
) == GIMPLE_PHI
7429 && is_loop_header_bb_p (gimple_bb (stmt
)))
7430 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7432 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7434 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7435 STMT_SLP_TYPE (res
) = loop_vect
;
7436 GROUP_FIRST_ELEMENT (res
) = NULL
;
7437 GROUP_NEXT_ELEMENT (res
) = NULL
;
7438 GROUP_SIZE (res
) = 0;
7439 GROUP_STORE_COUNT (res
) = 0;
7440 GROUP_GAP (res
) = 0;
7441 GROUP_SAME_DR_STMT (res
) = NULL
;
7447 /* Create a hash table for stmt_vec_info. */
7450 init_stmt_vec_info_vec (void)
7452 gcc_assert (!stmt_vec_info_vec
.exists ());
7453 stmt_vec_info_vec
.create (50);
7457 /* Free hash table for stmt_vec_info. */
7460 free_stmt_vec_info_vec (void)
7464 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7466 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7467 gcc_assert (stmt_vec_info_vec
.exists ());
7468 stmt_vec_info_vec
.release ();
7472 /* Free stmt vectorization related info. */
7475 free_stmt_vec_info (gimple stmt
)
7477 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7482 /* Check if this statement has a related "pattern stmt"
7483 (introduced by the vectorizer during the pattern recognition
7484 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7486 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7488 stmt_vec_info patt_info
7489 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7492 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7493 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7494 gimple_set_bb (patt_stmt
, NULL
);
7495 tree lhs
= gimple_get_lhs (patt_stmt
);
7496 if (TREE_CODE (lhs
) == SSA_NAME
)
7497 release_ssa_name (lhs
);
7500 gimple_stmt_iterator si
;
7501 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7503 gimple seq_stmt
= gsi_stmt (si
);
7504 gimple_set_bb (seq_stmt
, NULL
);
7505 lhs
= gimple_get_lhs (patt_stmt
);
7506 if (TREE_CODE (lhs
) == SSA_NAME
)
7507 release_ssa_name (lhs
);
7508 free_stmt_vec_info (seq_stmt
);
7511 free_stmt_vec_info (patt_stmt
);
7515 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7516 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
7517 set_vinfo_for_stmt (stmt
, NULL
);
7522 /* Function get_vectype_for_scalar_type_and_size.
7524 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7528 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7530 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7531 machine_mode simd_mode
;
7532 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7539 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7540 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7543 /* For vector types of elements whose mode precision doesn't
7544 match their types precision we use a element type of mode
7545 precision. The vectorization routines will have to make sure
7546 they support the proper result truncation/extension.
7547 We also make sure to build vector types with INTEGER_TYPE
7548 component type only. */
7549 if (INTEGRAL_TYPE_P (scalar_type
)
7550 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7551 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7552 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7553 TYPE_UNSIGNED (scalar_type
));
7555 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7556 When the component mode passes the above test simply use a type
7557 corresponding to that mode. The theory is that any use that
7558 would cause problems with this will disable vectorization anyway. */
7559 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7560 && !INTEGRAL_TYPE_P (scalar_type
))
7561 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7563 /* We can't build a vector type of elements with alignment bigger than
7565 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7566 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7567 TYPE_UNSIGNED (scalar_type
));
7569 /* If we felt back to using the mode fail if there was
7570 no scalar type for it. */
7571 if (scalar_type
== NULL_TREE
)
7574 /* If no size was supplied use the mode the target prefers. Otherwise
7575 lookup a vector mode of the specified size. */
7577 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7579 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7580 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7584 vectype
= build_vector_type (scalar_type
, nunits
);
7586 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7587 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7593 unsigned int current_vector_size
;
7595 /* Function get_vectype_for_scalar_type.
7597 Returns the vector type corresponding to SCALAR_TYPE as supported
7601 get_vectype_for_scalar_type (tree scalar_type
)
7604 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7605 current_vector_size
);
7607 && current_vector_size
== 0)
7608 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7612 /* Function get_same_sized_vectype
7614 Returns a vector type corresponding to SCALAR_TYPE of size
7615 VECTOR_TYPE if supported by the target. */
7618 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7620 return get_vectype_for_scalar_type_and_size
7621 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7624 /* Function vect_is_simple_use.
7627 LOOP_VINFO - the vect info of the loop that is being vectorized.
7628 BB_VINFO - the vect info of the basic block that is being vectorized.
7629 OPERAND - operand of STMT in the loop or bb.
7630 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7632 Returns whether a stmt with OPERAND can be vectorized.
7633 For loops, supportable operands are constants, loop invariants, and operands
7634 that are defined by the current iteration of the loop. Unsupportable
7635 operands are those that are defined by a previous iteration of the loop (as
7636 is the case in reduction/induction computations).
7637 For basic blocks, supportable operands are constants and bb invariants.
7638 For now, operands defined outside the basic block are not supported. */
7641 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7642 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7643 tree
*def
, enum vect_def_type
*dt
)
7646 stmt_vec_info stmt_vinfo
;
7647 struct loop
*loop
= NULL
;
7650 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7655 if (dump_enabled_p ())
7657 dump_printf_loc (MSG_NOTE
, vect_location
,
7658 "vect_is_simple_use: operand ");
7659 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7660 dump_printf (MSG_NOTE
, "\n");
7663 if (CONSTANT_CLASS_P (operand
))
7665 *dt
= vect_constant_def
;
7669 if (is_gimple_min_invariant (operand
))
7672 *dt
= vect_external_def
;
7676 if (TREE_CODE (operand
) == PAREN_EXPR
)
7678 if (dump_enabled_p ())
7679 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7680 operand
= TREE_OPERAND (operand
, 0);
7683 if (TREE_CODE (operand
) != SSA_NAME
)
7685 if (dump_enabled_p ())
7686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7691 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7692 if (*def_stmt
== NULL
)
7694 if (dump_enabled_p ())
7695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7700 if (dump_enabled_p ())
7702 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7703 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7704 dump_printf (MSG_NOTE
, "\n");
7707 /* Empty stmt is expected only in case of a function argument.
7708 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7709 if (gimple_nop_p (*def_stmt
))
7712 *dt
= vect_external_def
;
7716 bb
= gimple_bb (*def_stmt
);
7718 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7719 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7720 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7721 *dt
= vect_external_def
;
7724 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7725 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7728 if (*dt
== vect_unknown_def_type
7730 && *dt
== vect_double_reduction_def
7731 && gimple_code (stmt
) != GIMPLE_PHI
))
7733 if (dump_enabled_p ())
7734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7735 "Unsupported pattern.\n");
7739 if (dump_enabled_p ())
7740 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
7742 switch (gimple_code (*def_stmt
))
7745 *def
= gimple_phi_result (*def_stmt
);
7749 *def
= gimple_assign_lhs (*def_stmt
);
7753 *def
= gimple_call_lhs (*def_stmt
);
7758 if (dump_enabled_p ())
7759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7760 "unsupported defining stmt:\n");
7767 /* Function vect_is_simple_use_1.
7769 Same as vect_is_simple_use_1 but also determines the vector operand
7770 type of OPERAND and stores it to *VECTYPE. If the definition of
7771 OPERAND is vect_uninitialized_def, vect_constant_def or
7772 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7773 is responsible to compute the best suited vector type for the
7777 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7778 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7779 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
7781 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
7785 /* Now get a vector type if the def is internal, otherwise supply
7786 NULL_TREE and leave it up to the caller to figure out a proper
7787 type for the use stmt. */
7788 if (*dt
== vect_internal_def
7789 || *dt
== vect_induction_def
7790 || *dt
== vect_reduction_def
7791 || *dt
== vect_double_reduction_def
7792 || *dt
== vect_nested_cycle
)
7794 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
7796 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7797 && !STMT_VINFO_RELEVANT (stmt_info
)
7798 && !STMT_VINFO_LIVE_P (stmt_info
))
7799 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7801 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7802 gcc_assert (*vectype
!= NULL_TREE
);
7804 else if (*dt
== vect_uninitialized_def
7805 || *dt
== vect_constant_def
7806 || *dt
== vect_external_def
)
7807 *vectype
= NULL_TREE
;
7815 /* Function supportable_widening_operation
7817 Check whether an operation represented by the code CODE is a
7818 widening operation that is supported by the target platform in
7819 vector form (i.e., when operating on arguments of type VECTYPE_IN
7820 producing a result of type VECTYPE_OUT).
7822 Widening operations we currently support are NOP (CONVERT), FLOAT
7823 and WIDEN_MULT. This function checks if these operations are supported
7824 by the target platform either directly (via vector tree-codes), or via
7828 - CODE1 and CODE2 are codes of vector operations to be used when
7829 vectorizing the operation, if available.
7830 - MULTI_STEP_CVT determines the number of required intermediate steps in
7831 case of multi-step conversion (like char->short->int - in that case
7832 MULTI_STEP_CVT will be 1).
7833 - INTERM_TYPES contains the intermediate type required to perform the
7834 widening operation (short in the above example). */
7837 supportable_widening_operation (enum tree_code code
, gimple stmt
,
7838 tree vectype_out
, tree vectype_in
,
7839 enum tree_code
*code1
, enum tree_code
*code2
,
7840 int *multi_step_cvt
,
7841 vec
<tree
> *interm_types
)
7843 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7844 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7845 struct loop
*vect_loop
= NULL
;
7846 machine_mode vec_mode
;
7847 enum insn_code icode1
, icode2
;
7848 optab optab1
, optab2
;
7849 tree vectype
= vectype_in
;
7850 tree wide_vectype
= vectype_out
;
7851 enum tree_code c1
, c2
;
7853 tree prev_type
, intermediate_type
;
7854 machine_mode intermediate_mode
, prev_mode
;
7855 optab optab3
, optab4
;
7857 *multi_step_cvt
= 0;
7859 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
7863 case WIDEN_MULT_EXPR
:
7864 /* The result of a vectorized widening operation usually requires
7865 two vectors (because the widened results do not fit into one vector).
7866 The generated vector results would normally be expected to be
7867 generated in the same order as in the original scalar computation,
7868 i.e. if 8 results are generated in each vector iteration, they are
7869 to be organized as follows:
7870 vect1: [res1,res2,res3,res4],
7871 vect2: [res5,res6,res7,res8].
7873 However, in the special case that the result of the widening
7874 operation is used in a reduction computation only, the order doesn't
7875 matter (because when vectorizing a reduction we change the order of
7876 the computation). Some targets can take advantage of this and
7877 generate more efficient code. For example, targets like Altivec,
7878 that support widen_mult using a sequence of {mult_even,mult_odd}
7879 generate the following vectors:
7880 vect1: [res1,res3,res5,res7],
7881 vect2: [res2,res4,res6,res8].
7883 When vectorizing outer-loops, we execute the inner-loop sequentially
7884 (each vectorized inner-loop iteration contributes to VF outer-loop
7885 iterations in parallel). We therefore don't allow to change the
7886 order of the computation in the inner-loop during outer-loop
7888 /* TODO: Another case in which order doesn't *really* matter is when we
7889 widen and then contract again, e.g. (short)((int)x * y >> 8).
7890 Normally, pack_trunc performs an even/odd permute, whereas the
7891 repack from an even/odd expansion would be an interleave, which
7892 would be significantly simpler for e.g. AVX2. */
7893 /* In any case, in order to avoid duplicating the code below, recurse
7894 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7895 are properly set up for the caller. If we fail, we'll continue with
7896 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7898 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
7899 && !nested_in_vect_loop_p (vect_loop
, stmt
)
7900 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
7901 stmt
, vectype_out
, vectype_in
,
7902 code1
, code2
, multi_step_cvt
,
7905 /* Elements in a vector with vect_used_by_reduction property cannot
7906 be reordered if the use chain with this property does not have the
7907 same operation. One such an example is s += a * b, where elements
7908 in a and b cannot be reordered. Here we check if the vector defined
7909 by STMT is only directly used in the reduction statement. */
7910 tree lhs
= gimple_assign_lhs (stmt
);
7911 use_operand_p dummy
;
7913 stmt_vec_info use_stmt_info
= NULL
;
7914 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
7915 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
7916 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
7919 c1
= VEC_WIDEN_MULT_LO_EXPR
;
7920 c2
= VEC_WIDEN_MULT_HI_EXPR
;
7923 case VEC_WIDEN_MULT_EVEN_EXPR
:
7924 /* Support the recursion induced just above. */
7925 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
7926 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
7929 case WIDEN_LSHIFT_EXPR
:
7930 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
7931 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
7935 c1
= VEC_UNPACK_LO_EXPR
;
7936 c2
= VEC_UNPACK_HI_EXPR
;
7940 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
7941 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
7944 case FIX_TRUNC_EXPR
:
7945 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7946 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7947 computing the operation. */
7954 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
7956 enum tree_code ctmp
= c1
;
7961 if (code
== FIX_TRUNC_EXPR
)
7963 /* The signedness is determined from output operand. */
7964 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7965 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
7969 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7970 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
7973 if (!optab1
|| !optab2
)
7976 vec_mode
= TYPE_MODE (vectype
);
7977 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
7978 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
7984 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7985 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7988 /* Check if it's a multi-step conversion that can be done using intermediate
7991 prev_type
= vectype
;
7992 prev_mode
= vec_mode
;
7994 if (!CONVERT_EXPR_CODE_P (code
))
7997 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7998 intermediate steps in promotion sequence. We try
7999 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8001 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8002 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8004 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8006 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8007 TYPE_UNSIGNED (prev_type
));
8008 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8009 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8011 if (!optab3
|| !optab4
8012 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8013 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8014 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8015 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8016 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8017 == CODE_FOR_nothing
)
8018 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8019 == CODE_FOR_nothing
))
8022 interm_types
->quick_push (intermediate_type
);
8023 (*multi_step_cvt
)++;
8025 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8026 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8029 prev_type
= intermediate_type
;
8030 prev_mode
= intermediate_mode
;
8033 interm_types
->release ();
8038 /* Function supportable_narrowing_operation
8040 Check whether an operation represented by the code CODE is a
8041 narrowing operation that is supported by the target platform in
8042 vector form (i.e., when operating on arguments of type VECTYPE_IN
8043 and producing a result of type VECTYPE_OUT).
8045 Narrowing operations we currently support are NOP (CONVERT) and
8046 FIX_TRUNC. This function checks if these operations are supported by
8047 the target platform directly via vector tree-codes.
8050 - CODE1 is the code of a vector operation to be used when
8051 vectorizing the operation, if available.
8052 - MULTI_STEP_CVT determines the number of required intermediate steps in
8053 case of multi-step conversion (like int->short->char - in that case
8054 MULTI_STEP_CVT will be 1).
8055 - INTERM_TYPES contains the intermediate type required to perform the
8056 narrowing operation (short in the above example). */
8059 supportable_narrowing_operation (enum tree_code code
,
8060 tree vectype_out
, tree vectype_in
,
8061 enum tree_code
*code1
, int *multi_step_cvt
,
8062 vec
<tree
> *interm_types
)
8064 machine_mode vec_mode
;
8065 enum insn_code icode1
;
8066 optab optab1
, interm_optab
;
8067 tree vectype
= vectype_in
;
8068 tree narrow_vectype
= vectype_out
;
8070 tree intermediate_type
;
8071 machine_mode intermediate_mode
, prev_mode
;
8075 *multi_step_cvt
= 0;
8079 c1
= VEC_PACK_TRUNC_EXPR
;
8082 case FIX_TRUNC_EXPR
:
8083 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8087 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8088 tree code and optabs used for computing the operation. */
8095 if (code
== FIX_TRUNC_EXPR
)
8096 /* The signedness is determined from output operand. */
8097 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8099 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8104 vec_mode
= TYPE_MODE (vectype
);
8105 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8110 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8113 /* Check if it's a multi-step conversion that can be done using intermediate
8115 prev_mode
= vec_mode
;
8116 if (code
== FIX_TRUNC_EXPR
)
8117 uns
= TYPE_UNSIGNED (vectype_out
);
8119 uns
= TYPE_UNSIGNED (vectype
);
8121 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8122 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8123 costly than signed. */
8124 if (code
== FIX_TRUNC_EXPR
&& uns
)
8126 enum insn_code icode2
;
8129 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8131 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8132 if (interm_optab
!= unknown_optab
8133 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8134 && insn_data
[icode1
].operand
[0].mode
8135 == insn_data
[icode2
].operand
[0].mode
)
8138 optab1
= interm_optab
;
8143 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8144 intermediate steps in promotion sequence. We try
8145 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8146 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8147 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8149 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8151 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8153 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8156 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8157 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8158 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8159 == CODE_FOR_nothing
))
8162 interm_types
->quick_push (intermediate_type
);
8163 (*multi_step_cvt
)++;
8165 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8168 prev_mode
= intermediate_mode
;
8169 optab1
= interm_optab
;
8172 interm_types
->release ();