1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
33 #include "gimple-ssa.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-ssa-loop-manip.h"
41 #include "recog.h" /* FIXME: for insn_data */
43 #include "diagnostic-core.h"
44 #include "tree-vectorizer.h"
47 /* For lang_hooks.types.type_for_mode. */
48 #include "langhooks.h"
50 /* Return the vectorized type for the given statement. */
53 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
55 return STMT_VINFO_VECTYPE (stmt_info
);
58 /* Return TRUE iff the given statement is in an inner loop relative to
59 the loop being vectorized. */
61 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
63 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
64 basic_block bb
= gimple_bb (stmt
);
65 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
71 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
73 return (bb
->loop_father
== loop
->inner
);
76 /* Record the cost of a statement, either by directly informing the
77 target model or by saving it in a vector for later processing.
78 Return a preliminary estimate of the statement's cost. */
81 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
82 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
83 int misalign
, enum vect_cost_model_location where
)
87 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
88 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
89 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
92 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
97 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
98 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
99 void *target_cost_data
;
102 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
104 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
106 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
116 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
126 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
127 tree array
, unsigned HOST_WIDE_INT n
)
129 tree vect_type
, vect
, vect_name
, array_ref
;
132 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
133 vect_type
= TREE_TYPE (TREE_TYPE (array
));
134 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
135 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
136 build_int_cst (size_type_node
, n
),
137 NULL_TREE
, NULL_TREE
);
139 new_stmt
= gimple_build_assign (vect
, array_ref
);
140 vect_name
= make_ssa_name (vect
, new_stmt
);
141 gimple_assign_set_lhs (new_stmt
, vect_name
);
142 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
152 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
153 tree array
, unsigned HOST_WIDE_INT n
)
158 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
159 build_int_cst (size_type_node
, n
),
160 NULL_TREE
, NULL_TREE
);
162 new_stmt
= gimple_build_assign (array_ref
, vect
);
163 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
171 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
173 tree mem_ref
, alias_ptr_type
;
175 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
176 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
182 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
184 /* Function vect_mark_relevant.
186 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
190 enum vect_relevant relevant
, bool live_p
,
191 bool used_in_pattern
)
193 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
194 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
195 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
198 if (dump_enabled_p ())
199 dump_printf_loc (MSG_NOTE
, vect_location
,
200 "mark relevant %d, live %d.\n", relevant
, live_p
);
202 /* If this stmt is an original stmt in a pattern, we might need to mark its
203 related pattern stmt instead of the original stmt. However, such stmts
204 may have their own uses that are not in any pattern, in such cases the
205 stmt itself should be marked. */
206 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
209 if (!used_in_pattern
)
211 imm_use_iterator imm_iter
;
215 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
216 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
218 if (is_gimple_assign (stmt
))
219 lhs
= gimple_assign_lhs (stmt
);
221 lhs
= gimple_call_lhs (stmt
);
223 /* This use is out of pattern use, if LHS has other uses that are
224 pattern uses, we should mark the stmt itself, and not the pattern
226 if (TREE_CODE (lhs
) == SSA_NAME
)
227 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
229 if (is_gimple_debug (USE_STMT (use_p
)))
231 use_stmt
= USE_STMT (use_p
);
233 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
236 if (vinfo_for_stmt (use_stmt
)
237 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
247 /* This is the last stmt in a sequence that was detected as a
248 pattern that can potentially be vectorized. Don't mark the stmt
249 as relevant/live because it's not going to be vectorized.
250 Instead mark the pattern-stmt that replaces it. */
252 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
254 if (dump_enabled_p ())
255 dump_printf_loc (MSG_NOTE
, vect_location
,
256 "last stmt in pattern. don't mark"
257 " relevant/live.\n");
258 stmt_info
= vinfo_for_stmt (pattern_stmt
);
259 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
260 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
261 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
266 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
267 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
268 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
270 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
271 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE
, vect_location
,
275 "already marked relevant/live.\n");
279 worklist
->safe_push (stmt
);
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT in loop that is represented by LOOP_VINFO is
286 "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
296 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
297 enum vect_relevant
*relevant
, bool *live_p
)
299 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
301 imm_use_iterator imm_iter
;
305 *relevant
= vect_unused_in_scope
;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt
)
310 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
311 != loop_exit_ctrl_vec_info_type
)
312 *relevant
= vect_used_in_scope
;
314 /* changing memory. */
315 if (gimple_code (stmt
) != GIMPLE_PHI
)
316 if (gimple_vdef (stmt
))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE
, vect_location
,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant
= vect_used_in_scope
;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
327 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
329 basic_block bb
= gimple_bb (USE_STMT (use_p
));
330 if (!flow_bb_inside_loop_p (loop
, bb
))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE
, vect_location
,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p
)))
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
342 gcc_assert (bb
== single_exit (loop
)->dest
);
349 return (*live_p
|| *relevant
);
353 /* Function exist_non_indexing_operands_for_use_p
355 USE is one of the uses attached to STMT. Check if USE is
356 used in STMT for anything other than indexing an array. */
359 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
362 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
364 /* USE corresponds to some operand in STMT. If there is no data
365 reference in STMT, then any operand that corresponds to USE
366 is not indexing an array. */
367 if (!STMT_VINFO_DATA_REF (stmt_info
))
370 /* STMT has a data_ref. FORNOW this means that its of one of
374 (This should have been verified in analyze_data_refs).
376 'var' in the second case corresponds to a def, not a use,
377 so USE cannot correspond to any operands that are not used
380 Therefore, all we need to check is if STMT falls into the
381 first case, and whether var corresponds to USE. */
383 if (!gimple_assign_copy_p (stmt
))
385 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
387 operand
= gimple_assign_rhs1 (stmt
);
388 if (TREE_CODE (operand
) != SSA_NAME
)
399 Function process_use.
402 - a USE in STMT in a loop represented by LOOP_VINFO
403 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
404 that defined USE. This is done by calling mark_relevant and passing it
405 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
406 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
410 Generally, LIVE_P and RELEVANT are used to define the liveness and
411 relevance info of the DEF_STMT of this USE:
412 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
413 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
415 - case 1: If USE is used only for address computations (e.g. array indexing),
416 which does not need to be directly vectorized, then the liveness/relevance
417 of the respective DEF_STMT is left unchanged.
418 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
419 skip DEF_STMT cause it had already been processed.
420 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
421 be modified accordingly.
423 Return true if everything is as expected. Return false otherwise. */
426 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
427 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
430 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
431 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
432 stmt_vec_info dstmt_vinfo
;
433 basic_block bb
, def_bb
;
436 enum vect_def_type dt
;
438 /* case 1: we are only interested in uses that need to be vectorized. Uses
439 that are used for address computation are not considered relevant. */
440 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
443 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
445 if (dump_enabled_p ())
446 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
447 "not vectorized: unsupported use in stmt.\n");
451 if (!def_stmt
|| gimple_nop_p (def_stmt
))
454 def_bb
= gimple_bb (def_stmt
);
455 if (!flow_bb_inside_loop_p (loop
, def_bb
))
457 if (dump_enabled_p ())
458 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
462 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
463 DEF_STMT must have already been processed, because this should be the
464 only way that STMT, which is a reduction-phi, was put in the worklist,
465 as there should be no other uses for DEF_STMT in the loop. So we just
466 check that everything is as expected, and we are done. */
467 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
468 bb
= gimple_bb (stmt
);
469 if (gimple_code (stmt
) == GIMPLE_PHI
470 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
471 && gimple_code (def_stmt
) != GIMPLE_PHI
472 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
473 && bb
->loop_father
== def_bb
->loop_father
)
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_NOTE
, vect_location
,
477 "reduc-stmt defining reduc-phi in the same nest.\n");
478 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
479 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
480 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
481 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
482 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
486 /* case 3a: outer-loop stmt defining an inner-loop stmt:
487 outer-loop-header-bb:
493 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
495 if (dump_enabled_p ())
496 dump_printf_loc (MSG_NOTE
, vect_location
,
497 "outer-loop def-stmt defining inner-loop stmt.\n");
501 case vect_unused_in_scope
:
502 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
503 vect_used_in_scope
: vect_unused_in_scope
;
506 case vect_used_in_outer_by_reduction
:
507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
508 relevant
= vect_used_by_reduction
;
511 case vect_used_in_outer
:
512 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
513 relevant
= vect_used_in_scope
;
516 case vect_used_in_scope
:
524 /* case 3b: inner-loop stmt defining an outer-loop stmt:
525 outer-loop-header-bb:
529 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
531 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
533 if (dump_enabled_p ())
534 dump_printf_loc (MSG_NOTE
, vect_location
,
535 "inner-loop def-stmt defining outer-loop stmt.\n");
539 case vect_unused_in_scope
:
540 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
541 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
542 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
545 case vect_used_by_reduction
:
546 relevant
= vect_used_in_outer_by_reduction
;
549 case vect_used_in_scope
:
550 relevant
= vect_used_in_outer
;
558 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
559 is_pattern_stmt_p (stmt_vinfo
));
564 /* Function vect_mark_stmts_to_be_vectorized.
566 Not all stmts in the loop need to be vectorized. For example:
575 Stmt 1 and 3 do not need to be vectorized, because loop control and
576 addressing of vectorized data-refs are handled differently.
578 This pass detects such stmts. */
581 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
583 vec
<gimple
> worklist
;
584 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
585 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
586 unsigned int nbbs
= loop
->num_nodes
;
587 gimple_stmt_iterator si
;
590 stmt_vec_info stmt_vinfo
;
594 enum vect_relevant relevant
, tmp_relevant
;
595 enum vect_def_type def_type
;
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE
, vect_location
,
599 "=== vect_mark_stmts_to_be_vectorized ===\n");
601 worklist
.create (64);
603 /* 1. Init worklist. */
604 for (i
= 0; i
< nbbs
; i
++)
607 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
610 if (dump_enabled_p ())
612 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
613 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
614 dump_printf (MSG_NOTE
, "\n");
617 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
618 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
620 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
622 stmt
= gsi_stmt (si
);
623 if (dump_enabled_p ())
625 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
626 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
627 dump_printf (MSG_NOTE
, "\n");
630 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
631 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
635 /* 2. Process_worklist */
636 while (worklist
.length () > 0)
641 stmt
= worklist
.pop ();
642 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
645 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
646 dump_printf (MSG_NOTE
, "\n");
649 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
650 (DEF_STMT) as relevant/irrelevant and live/dead according to the
651 liveness and relevance properties of STMT. */
652 stmt_vinfo
= vinfo_for_stmt (stmt
);
653 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
654 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
656 /* Generally, the liveness and relevance properties of STMT are
657 propagated as is to the DEF_STMTs of its USEs:
658 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
659 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
661 One exception is when STMT has been identified as defining a reduction
662 variable; in this case we set the liveness/relevance as follows:
664 relevant = vect_used_by_reduction
665 This is because we distinguish between two kinds of relevant stmts -
666 those that are used by a reduction computation, and those that are
667 (also) used by a regular computation. This allows us later on to
668 identify stmts that are used solely by a reduction, and therefore the
669 order of the results that they produce does not have to be kept. */
671 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
672 tmp_relevant
= relevant
;
675 case vect_reduction_def
:
676 switch (tmp_relevant
)
678 case vect_unused_in_scope
:
679 relevant
= vect_used_by_reduction
;
682 case vect_used_by_reduction
:
683 if (gimple_code (stmt
) == GIMPLE_PHI
)
688 if (dump_enabled_p ())
689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
690 "unsupported use of reduction.\n");
698 case vect_nested_cycle
:
699 if (tmp_relevant
!= vect_unused_in_scope
700 && tmp_relevant
!= vect_used_in_outer_by_reduction
701 && tmp_relevant
!= vect_used_in_outer
)
703 if (dump_enabled_p ())
704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
705 "unsupported use of nested cycle.\n");
714 case vect_double_reduction_def
:
715 if (tmp_relevant
!= vect_unused_in_scope
716 && tmp_relevant
!= vect_used_by_reduction
)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
720 "unsupported use of double reduction.\n");
733 if (is_pattern_stmt_p (stmt_vinfo
))
735 /* Pattern statements are not inserted into the code, so
736 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
737 have to scan the RHS or function arguments instead. */
738 if (is_gimple_assign (stmt
))
740 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
741 tree op
= gimple_assign_rhs1 (stmt
);
744 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
746 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
747 live_p
, relevant
, &worklist
, false)
748 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
749 live_p
, relevant
, &worklist
, false))
756 for (; i
< gimple_num_ops (stmt
); i
++)
758 op
= gimple_op (stmt
, i
);
759 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
767 else if (is_gimple_call (stmt
))
769 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
771 tree arg
= gimple_call_arg (stmt
, i
);
772 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
782 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
784 tree op
= USE_FROM_PTR (use_p
);
785 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
793 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
796 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
798 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
805 } /* while worklist */
812 /* Function vect_model_simple_cost.
814 Models cost for simple operations, i.e. those that only emit ncopies of a
815 single op. Right now, this does not account for multiple insns that could
816 be generated for the single vector op. We will handle that shortly. */
819 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
820 enum vect_def_type
*dt
,
821 stmt_vector_for_cost
*prologue_cost_vec
,
822 stmt_vector_for_cost
*body_cost_vec
)
825 int inside_cost
= 0, prologue_cost
= 0;
827 /* The SLP costs were already calculated during SLP tree build. */
828 if (PURE_SLP_STMT (stmt_info
))
831 /* FORNOW: Assuming maximum 2 args per stmts. */
832 for (i
= 0; i
< 2; i
++)
833 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
834 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
835 stmt_info
, 0, vect_prologue
);
837 /* Pass the inside-of-loop statements to the target-specific cost model. */
838 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
839 stmt_info
, 0, vect_body
);
841 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE
, vect_location
,
843 "vect_model_simple_cost: inside_cost = %d, "
844 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
848 /* Model cost for type demotion and promotion operations. PWR is normally
849 zero for single-step promotions and demotions. It will be one if
850 two-step promotion/demotion is required, and so on. Each additional
851 step doubles the number of instructions required. */
854 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
855 enum vect_def_type
*dt
, int pwr
)
858 int inside_cost
= 0, prologue_cost
= 0;
859 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
860 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
861 void *target_cost_data
;
863 /* The SLP costs were already calculated during SLP tree build. */
864 if (PURE_SLP_STMT (stmt_info
))
868 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
870 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
872 for (i
= 0; i
< pwr
+ 1; i
++)
874 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
876 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
877 vec_promote_demote
, stmt_info
, 0,
881 /* FORNOW: Assuming maximum 2 args per stmts. */
882 for (i
= 0; i
< 2; i
++)
883 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
884 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
885 stmt_info
, 0, vect_prologue
);
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_NOTE
, vect_location
,
889 "vect_model_promotion_demotion_cost: inside_cost = %d, "
890 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
893 /* Function vect_cost_group_size
895 For grouped load or store, return the group_size only if it is the first
896 load or store of a group, else return 1. This ensures that group size is
897 only returned once per group. */
900 vect_cost_group_size (stmt_vec_info stmt_info
)
902 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
904 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
905 return GROUP_SIZE (stmt_info
);
911 /* Function vect_model_store_cost
913 Models cost for stores. In the case of grouped accesses, one access
914 has the overhead of the grouped access attributed to it. */
917 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
918 bool store_lanes_p
, enum vect_def_type dt
,
920 stmt_vector_for_cost
*prologue_cost_vec
,
921 stmt_vector_for_cost
*body_cost_vec
)
924 unsigned int inside_cost
= 0, prologue_cost
= 0;
925 struct data_reference
*first_dr
;
928 /* The SLP costs were already calculated during SLP tree build. */
929 if (PURE_SLP_STMT (stmt_info
))
932 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
933 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
934 stmt_info
, 0, vect_prologue
);
936 /* Grouped access? */
937 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
941 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
946 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
947 group_size
= vect_cost_group_size (stmt_info
);
950 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
952 /* Not a grouped access. */
956 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
959 /* We assume that the cost of a single store-lanes instruction is
960 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
961 access is instead being provided by a permute-and-store operation,
962 include the cost of the permutes. */
963 if (!store_lanes_p
&& group_size
> 1)
965 /* Uses a high and low interleave operation for each needed permute. */
967 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
968 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
969 stmt_info
, 0, vect_body
);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE
, vect_location
,
973 "vect_model_store_cost: strided group_size = %d .\n",
977 /* Costs of the stores. */
978 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
980 if (dump_enabled_p ())
981 dump_printf_loc (MSG_NOTE
, vect_location
,
982 "vect_model_store_cost: inside_cost = %d, "
983 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
987 /* Calculate cost of DR's memory access. */
989 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
990 unsigned int *inside_cost
,
991 stmt_vector_for_cost
*body_cost_vec
)
993 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
994 gimple stmt
= DR_STMT (dr
);
995 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
997 switch (alignment_support_scheme
)
1001 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1002 vector_store
, stmt_info
, 0,
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE
, vect_location
,
1007 "vect_model_store_cost: aligned.\n");
1011 case dr_unaligned_supported
:
1013 /* Here, we assign an additional cost for the unaligned store. */
1014 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1015 unaligned_store
, stmt_info
,
1016 DR_MISALIGNMENT (dr
), vect_body
);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE
, vect_location
,
1019 "vect_model_store_cost: unaligned supported by "
1024 case dr_unaligned_unsupported
:
1026 *inside_cost
= VECT_MAX_COST
;
1028 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1030 "vect_model_store_cost: unsupported access.\n");
1040 /* Function vect_model_load_cost
1042 Models cost for loads. In the case of grouped accesses, the last access
1043 has the overhead of the grouped access attributed to it. Since unaligned
1044 accesses are supported for loads, we also account for the costs of the
1045 access scheme chosen. */
1048 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1049 bool load_lanes_p
, slp_tree slp_node
,
1050 stmt_vector_for_cost
*prologue_cost_vec
,
1051 stmt_vector_for_cost
*body_cost_vec
)
1055 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1056 unsigned int inside_cost
= 0, prologue_cost
= 0;
1058 /* The SLP costs were already calculated during SLP tree build. */
1059 if (PURE_SLP_STMT (stmt_info
))
1062 /* Grouped accesses? */
1063 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1064 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1066 group_size
= vect_cost_group_size (stmt_info
);
1067 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1069 /* Not a grouped access. */
1076 /* We assume that the cost of a single load-lanes instruction is
1077 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1078 access is instead being provided by a load-and-permute operation,
1079 include the cost of the permutes. */
1080 if (!load_lanes_p
&& group_size
> 1)
1082 /* Uses an even and odd extract operations for each needed permute. */
1083 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1084 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1085 stmt_info
, 0, vect_body
);
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE
, vect_location
,
1089 "vect_model_load_cost: strided group_size = %d .\n",
1093 /* The loads themselves. */
1094 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1096 /* N scalar loads plus gathering them into a vector. */
1097 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1098 inside_cost
+= record_stmt_cost (body_cost_vec
,
1099 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1100 scalar_load
, stmt_info
, 0, vect_body
);
1101 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1102 stmt_info
, 0, vect_body
);
1105 vect_get_load_cost (first_dr
, ncopies
,
1106 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1107 || group_size
> 1 || slp_node
),
1108 &inside_cost
, &prologue_cost
,
1109 prologue_cost_vec
, body_cost_vec
, true);
1111 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE
, vect_location
,
1113 "vect_model_load_cost: inside_cost = %d, "
1114 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1118 /* Calculate cost of DR's memory access. */
1120 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1121 bool add_realign_cost
, unsigned int *inside_cost
,
1122 unsigned int *prologue_cost
,
1123 stmt_vector_for_cost
*prologue_cost_vec
,
1124 stmt_vector_for_cost
*body_cost_vec
,
1125 bool record_prologue_costs
)
1127 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1128 gimple stmt
= DR_STMT (dr
);
1129 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1131 switch (alignment_support_scheme
)
1135 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1136 stmt_info
, 0, vect_body
);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE
, vect_location
,
1140 "vect_model_load_cost: aligned.\n");
1144 case dr_unaligned_supported
:
1146 /* Here, we assign an additional cost for the unaligned load. */
1147 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1148 unaligned_load
, stmt_info
,
1149 DR_MISALIGNMENT (dr
), vect_body
);
1151 if (dump_enabled_p ())
1152 dump_printf_loc (MSG_NOTE
, vect_location
,
1153 "vect_model_load_cost: unaligned supported by "
1158 case dr_explicit_realign
:
1160 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1161 vector_load
, stmt_info
, 0, vect_body
);
1162 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1163 vec_perm
, stmt_info
, 0, vect_body
);
1165 /* FIXME: If the misalignment remains fixed across the iterations of
1166 the containing loop, the following cost should be added to the
1168 if (targetm
.vectorize
.builtin_mask_for_load
)
1169 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1170 stmt_info
, 0, vect_body
);
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE
, vect_location
,
1174 "vect_model_load_cost: explicit realign\n");
1178 case dr_explicit_realign_optimized
:
1180 if (dump_enabled_p ())
1181 dump_printf_loc (MSG_NOTE
, vect_location
,
1182 "vect_model_load_cost: unaligned software "
1185 /* Unaligned software pipeline has a load of an address, an initial
1186 load, and possibly a mask operation to "prime" the loop. However,
1187 if this is an access in a group of loads, which provide grouped
1188 access, then the above cost should only be considered for one
1189 access in the group. Inside the loop, there is a load op
1190 and a realignment op. */
1192 if (add_realign_cost
&& record_prologue_costs
)
1194 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1195 vector_stmt
, stmt_info
,
1197 if (targetm
.vectorize
.builtin_mask_for_load
)
1198 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1199 vector_stmt
, stmt_info
,
1203 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1204 stmt_info
, 0, vect_body
);
1205 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1206 stmt_info
, 0, vect_body
);
1208 if (dump_enabled_p ())
1209 dump_printf_loc (MSG_NOTE
, vect_location
,
1210 "vect_model_load_cost: explicit realign optimized"
1216 case dr_unaligned_unsupported
:
1218 *inside_cost
= VECT_MAX_COST
;
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1222 "vect_model_load_cost: unsupported access.\n");
1231 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1232 the loop preheader for the vectorized stmt STMT. */
1235 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1238 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1241 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1242 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1246 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1250 if (nested_in_vect_loop_p (loop
, stmt
))
1253 pe
= loop_preheader_edge (loop
);
1254 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1255 gcc_assert (!new_bb
);
1259 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1261 gimple_stmt_iterator gsi_bb_start
;
1263 gcc_assert (bb_vinfo
);
1264 bb
= BB_VINFO_BB (bb_vinfo
);
1265 gsi_bb_start
= gsi_after_labels (bb
);
1266 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1270 if (dump_enabled_p ())
1272 dump_printf_loc (MSG_NOTE
, vect_location
,
1273 "created new init_stmt: ");
1274 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1275 dump_printf (MSG_NOTE
, "\n");
1279 /* Function vect_init_vector.
1281 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1282 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1283 vector type a vector with all elements equal to VAL is created first.
1284 Place the initialization at BSI if it is not NULL. Otherwise, place the
1285 initialization at the loop preheader.
1286 Return the DEF of INIT_STMT.
1287 It will be used in the vectorization of STMT. */
1290 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1297 if (TREE_CODE (type
) == VECTOR_TYPE
1298 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1300 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1302 if (CONSTANT_CLASS_P (val
))
1303 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1306 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1307 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1310 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1314 val
= build_vector_from_val (type
, val
);
1317 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1318 init_stmt
= gimple_build_assign (new_var
, val
);
1319 new_temp
= make_ssa_name (new_var
, init_stmt
);
1320 gimple_assign_set_lhs (init_stmt
, new_temp
);
1321 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1322 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1327 /* Function vect_get_vec_def_for_operand.
1329 OP is an operand in STMT. This function returns a (vector) def that will be
1330 used in the vectorized stmt for STMT.
1332 In the case that OP is an SSA_NAME which is defined in the loop, then
1333 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1335 In case OP is an invariant or constant, a new stmt that creates a vector def
1336 needs to be introduced. */
1339 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1344 stmt_vec_info def_stmt_info
= NULL
;
1345 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1346 unsigned int nunits
;
1347 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1349 enum vect_def_type dt
;
1353 if (dump_enabled_p ())
1355 dump_printf_loc (MSG_NOTE
, vect_location
,
1356 "vect_get_vec_def_for_operand: ");
1357 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1358 dump_printf (MSG_NOTE
, "\n");
1361 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1362 &def_stmt
, &def
, &dt
);
1363 gcc_assert (is_simple_use
);
1364 if (dump_enabled_p ())
1366 int loc_printed
= 0;
1369 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1371 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1372 dump_printf (MSG_NOTE
, "\n");
1377 dump_printf (MSG_NOTE
, " def_stmt = ");
1379 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1380 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1381 dump_printf (MSG_NOTE
, "\n");
1387 /* Case 1: operand is a constant. */
1388 case vect_constant_def
:
1390 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1391 gcc_assert (vector_type
);
1392 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1397 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1398 if (dump_enabled_p ())
1399 dump_printf_loc (MSG_NOTE
, vect_location
,
1400 "Create vector_cst. nunits = %d\n", nunits
);
1402 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1405 /* Case 2: operand is defined outside the loop - loop invariant. */
1406 case vect_external_def
:
1408 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1409 gcc_assert (vector_type
);
1414 /* Create 'vec_inv = {inv,inv,..,inv}' */
1415 if (dump_enabled_p ())
1416 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1418 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1421 /* Case 3: operand is defined inside the loop. */
1422 case vect_internal_def
:
1425 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1427 /* Get the def from the vectorized stmt. */
1428 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1430 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1431 /* Get vectorized pattern statement. */
1433 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1434 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1435 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1436 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1437 gcc_assert (vec_stmt
);
1438 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1439 vec_oprnd
= PHI_RESULT (vec_stmt
);
1440 else if (is_gimple_call (vec_stmt
))
1441 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1443 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1447 /* Case 4: operand is defined by a loop header phi - reduction */
1448 case vect_reduction_def
:
1449 case vect_double_reduction_def
:
1450 case vect_nested_cycle
:
1454 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1455 loop
= (gimple_bb (def_stmt
))->loop_father
;
1457 /* Get the def before the loop */
1458 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1459 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1462 /* Case 5: operand is defined by loop-header phi - induction. */
1463 case vect_induction_def
:
1465 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1467 /* Get the def from the vectorized stmt. */
1468 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1469 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1470 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1471 vec_oprnd
= PHI_RESULT (vec_stmt
);
1473 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1483 /* Function vect_get_vec_def_for_stmt_copy
1485 Return a vector-def for an operand. This function is used when the
1486 vectorized stmt to be created (by the caller to this function) is a "copy"
1487 created in case the vectorized result cannot fit in one vector, and several
1488 copies of the vector-stmt are required. In this case the vector-def is
1489 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1490 of the stmt that defines VEC_OPRND.
1491 DT is the type of the vector def VEC_OPRND.
1494 In case the vectorization factor (VF) is bigger than the number
1495 of elements that can fit in a vectype (nunits), we have to generate
1496 more than one vector stmt to vectorize the scalar stmt. This situation
1497 arises when there are multiple data-types operated upon in the loop; the
1498 smallest data-type determines the VF, and as a result, when vectorizing
1499 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1500 vector stmt (each computing a vector of 'nunits' results, and together
1501 computing 'VF' results in each iteration). This function is called when
1502 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1503 which VF=16 and nunits=4, so the number of copies required is 4):
1505 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1507 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1508 VS1.1: vx.1 = memref1 VS1.2
1509 VS1.2: vx.2 = memref2 VS1.3
1510 VS1.3: vx.3 = memref3
1512 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1513 VSnew.1: vz1 = vx.1 + ... VSnew.2
1514 VSnew.2: vz2 = vx.2 + ... VSnew.3
1515 VSnew.3: vz3 = vx.3 + ...
1517 The vectorization of S1 is explained in vectorizable_load.
1518 The vectorization of S2:
1519 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1520 the function 'vect_get_vec_def_for_operand' is called to
1521 get the relevant vector-def for each operand of S2. For operand x it
1522 returns the vector-def 'vx.0'.
1524 To create the remaining copies of the vector-stmt (VSnew.j), this
1525 function is called to get the relevant vector-def for each operand. It is
1526 obtained from the respective VS1.j stmt, which is recorded in the
1527 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1529 For example, to obtain the vector-def 'vx.1' in order to create the
1530 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1531 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1532 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1533 and return its def ('vx.1').
1534 Overall, to create the above sequence this function will be called 3 times:
1535 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1536 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1537 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1540 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1542 gimple vec_stmt_for_operand
;
1543 stmt_vec_info def_stmt_info
;
1545 /* Do nothing; can reuse same def. */
1546 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1549 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1550 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1551 gcc_assert (def_stmt_info
);
1552 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1553 gcc_assert (vec_stmt_for_operand
);
1554 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1555 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1556 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1558 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1563 /* Get vectorized definitions for the operands to create a copy of an original
1564 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1567 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1568 vec
<tree
> *vec_oprnds0
,
1569 vec
<tree
> *vec_oprnds1
)
1571 tree vec_oprnd
= vec_oprnds0
->pop ();
1573 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1574 vec_oprnds0
->quick_push (vec_oprnd
);
1576 if (vec_oprnds1
&& vec_oprnds1
->length ())
1578 vec_oprnd
= vec_oprnds1
->pop ();
1579 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1580 vec_oprnds1
->quick_push (vec_oprnd
);
1585 /* Get vectorized definitions for OP0 and OP1.
1586 REDUC_INDEX is the index of reduction operand in case of reduction,
1587 and -1 otherwise. */
1590 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1591 vec
<tree
> *vec_oprnds0
,
1592 vec
<tree
> *vec_oprnds1
,
1593 slp_tree slp_node
, int reduc_index
)
1597 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1600 vec
<vec
<tree
> > vec_defs
;
1601 vec_defs
.create (nops
);
1603 ops
.quick_push (op0
);
1605 ops
.quick_push (op1
);
1607 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1609 *vec_oprnds0
= vec_defs
[0];
1611 *vec_oprnds1
= vec_defs
[1];
1614 vec_defs
.release ();
1620 vec_oprnds0
->create (1);
1621 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1622 vec_oprnds0
->quick_push (vec_oprnd
);
1626 vec_oprnds1
->create (1);
1627 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1628 vec_oprnds1
->quick_push (vec_oprnd
);
1634 /* Function vect_finish_stmt_generation.
1636 Insert a new stmt. */
1639 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1640 gimple_stmt_iterator
*gsi
)
1642 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1643 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1644 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1646 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1648 if (!gsi_end_p (*gsi
)
1649 && gimple_has_mem_ops (vec_stmt
))
1651 gimple at_stmt
= gsi_stmt (*gsi
);
1652 tree vuse
= gimple_vuse (at_stmt
);
1653 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1655 tree vdef
= gimple_vdef (at_stmt
);
1656 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1657 /* If we have an SSA vuse and insert a store, update virtual
1658 SSA form to avoid triggering the renamer. Do so only
1659 if we can easily see all uses - which is what almost always
1660 happens with the way vectorized stmts are inserted. */
1661 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1662 && ((is_gimple_assign (vec_stmt
)
1663 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1664 || (is_gimple_call (vec_stmt
)
1665 && !(gimple_call_flags (vec_stmt
)
1666 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1668 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1669 gimple_set_vdef (vec_stmt
, new_vdef
);
1670 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1674 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1676 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1679 if (dump_enabled_p ())
1681 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1682 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1683 dump_printf (MSG_NOTE
, "\n");
1686 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1689 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1690 a function declaration if the target has a vectorized version
1691 of the function, or NULL_TREE if the function cannot be vectorized. */
1694 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1696 tree fndecl
= gimple_call_fndecl (call
);
1698 /* We only handle functions that do not read or clobber memory -- i.e.
1699 const or novops ones. */
1700 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1704 || TREE_CODE (fndecl
) != FUNCTION_DECL
1705 || !DECL_BUILT_IN (fndecl
))
1708 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1712 /* Function vectorizable_call.
1714 Check if STMT performs a function call that can be vectorized.
1715 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1716 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1717 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1720 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1726 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1727 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1728 tree vectype_out
, vectype_in
;
1731 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1732 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1733 tree fndecl
, new_temp
, def
, rhs_type
;
1735 enum vect_def_type dt
[3]
1736 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1737 gimple new_stmt
= NULL
;
1739 vec
<tree
> vargs
= vNULL
;
1740 enum { NARROW
, NONE
, WIDEN
} modifier
;
1744 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1747 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1750 /* Is STMT a vectorizable call? */
1751 if (!is_gimple_call (stmt
))
1754 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1757 if (stmt_can_throw_internal (stmt
))
1760 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1762 /* Process function arguments. */
1763 rhs_type
= NULL_TREE
;
1764 vectype_in
= NULL_TREE
;
1765 nargs
= gimple_call_num_args (stmt
);
1767 /* Bail out if the function has more than three arguments, we do not have
1768 interesting builtin functions to vectorize with more than two arguments
1769 except for fma. No arguments is also not good. */
1770 if (nargs
== 0 || nargs
> 3)
1773 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1774 if (gimple_call_internal_p (stmt
)
1775 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1778 rhs_type
= unsigned_type_node
;
1781 for (i
= 0; i
< nargs
; i
++)
1785 op
= gimple_call_arg (stmt
, i
);
1787 /* We can only handle calls with arguments of the same type. */
1789 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1791 if (dump_enabled_p ())
1792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1793 "argument types differ.\n");
1797 rhs_type
= TREE_TYPE (op
);
1799 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1800 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1802 if (dump_enabled_p ())
1803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1804 "use not simple.\n");
1809 vectype_in
= opvectype
;
1811 && opvectype
!= vectype_in
)
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1815 "argument vector types differ.\n");
1819 /* If all arguments are external or constant defs use a vector type with
1820 the same size as the output vector type. */
1822 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1824 gcc_assert (vectype_in
);
1827 if (dump_enabled_p ())
1829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1830 "no vectype for scalar type ");
1831 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
1832 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1839 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1840 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1841 if (nunits_in
== nunits_out
/ 2)
1843 else if (nunits_out
== nunits_in
)
1845 else if (nunits_out
== nunits_in
/ 2)
1850 /* For now, we only vectorize functions if a target specific builtin
1851 is available. TODO -- in some cases, it might be profitable to
1852 insert the calls for pieces of the vector, in order to be able
1853 to vectorize other operations in the loop. */
1854 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1855 if (fndecl
== NULL_TREE
)
1857 if (gimple_call_internal_p (stmt
)
1858 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
1861 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1862 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
1863 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1864 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
1866 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1867 { 0, 1, 2, ... vf - 1 } vector. */
1868 gcc_assert (nargs
== 0);
1872 if (dump_enabled_p ())
1873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1874 "function is not vectorizable.\n");
1879 gcc_assert (!gimple_vuse (stmt
));
1881 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1883 else if (modifier
== NARROW
)
1884 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1886 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1888 /* Sanity check: make sure that at least one copy of the vectorized stmt
1889 needs to be generated. */
1890 gcc_assert (ncopies
>= 1);
1892 if (!vec_stmt
) /* transformation not required. */
1894 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
1898 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
1908 scalar_dest
= gimple_call_lhs (stmt
);
1909 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1911 prev_stmt_info
= NULL
;
1915 for (j
= 0; j
< ncopies
; ++j
)
1917 /* Build argument list for the vectorized call. */
1919 vargs
.create (nargs
);
1925 vec
<vec
<tree
> > vec_defs
;
1926 vec_defs
.create (nargs
);
1927 vec
<tree
> vec_oprnds0
;
1929 for (i
= 0; i
< nargs
; i
++)
1930 vargs
.quick_push (gimple_call_arg (stmt
, i
));
1931 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1932 vec_oprnds0
= vec_defs
[0];
1934 /* Arguments are ready. Create the new vector stmt. */
1935 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
1938 for (k
= 0; k
< nargs
; k
++)
1940 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
1941 vargs
[k
] = vec_oprndsk
[i
];
1943 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1944 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1945 gimple_call_set_lhs (new_stmt
, new_temp
);
1946 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1947 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
1950 for (i
= 0; i
< nargs
; i
++)
1952 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
1953 vec_oprndsi
.release ();
1955 vec_defs
.release ();
1959 for (i
= 0; i
< nargs
; i
++)
1961 op
= gimple_call_arg (stmt
, i
);
1964 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1967 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1969 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1972 vargs
.quick_push (vec_oprnd0
);
1975 if (gimple_call_internal_p (stmt
)
1976 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1978 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
1980 for (k
= 0; k
< nunits_out
; ++k
)
1981 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
1982 tree cst
= build_vector (vectype_out
, v
);
1984 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
1985 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
1986 new_temp
= make_ssa_name (new_var
, init_stmt
);
1987 gimple_assign_set_lhs (init_stmt
, new_temp
);
1988 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
1989 new_temp
= make_ssa_name (vec_dest
, NULL
);
1990 new_stmt
= gimple_build_assign (new_temp
,
1991 gimple_assign_lhs (init_stmt
));
1995 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1996 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1997 gimple_call_set_lhs (new_stmt
, new_temp
);
1999 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2002 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2004 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2006 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2012 for (j
= 0; j
< ncopies
; ++j
)
2014 /* Build argument list for the vectorized call. */
2016 vargs
.create (nargs
* 2);
2022 vec
<vec
<tree
> > vec_defs
;
2023 vec_defs
.create (nargs
);
2024 vec
<tree
> vec_oprnds0
;
2026 for (i
= 0; i
< nargs
; i
++)
2027 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2028 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2029 vec_oprnds0
= vec_defs
[0];
2031 /* Arguments are ready. Create the new vector stmt. */
2032 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2036 for (k
= 0; k
< nargs
; k
++)
2038 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2039 vargs
.quick_push (vec_oprndsk
[i
]);
2040 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2042 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2043 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2044 gimple_call_set_lhs (new_stmt
, new_temp
);
2045 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2046 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2049 for (i
= 0; i
< nargs
; i
++)
2051 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2052 vec_oprndsi
.release ();
2054 vec_defs
.release ();
2058 for (i
= 0; i
< nargs
; i
++)
2060 op
= gimple_call_arg (stmt
, i
);
2064 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2066 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2070 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2072 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2074 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2077 vargs
.quick_push (vec_oprnd0
);
2078 vargs
.quick_push (vec_oprnd1
);
2081 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2082 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2083 gimple_call_set_lhs (new_stmt
, new_temp
);
2084 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2087 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2089 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2091 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2094 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2099 /* No current target implements this case. */
2105 /* Update the exception handling table with the vector stmt if necessary. */
2106 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2107 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2109 /* The call in STMT might prevent it from being removed in dce.
2110 We however cannot remove it here, due to the way the ssa name
2111 it defines is mapped to the new definition. So just replace
2112 rhs of the statement with something harmless. */
2117 type
= TREE_TYPE (scalar_dest
);
2118 if (is_pattern_stmt_p (stmt_info
))
2119 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2121 lhs
= gimple_call_lhs (stmt
);
2122 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2123 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2124 set_vinfo_for_stmt (stmt
, NULL
);
2125 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2126 gsi_replace (gsi
, new_stmt
, false);
2127 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2133 /* Function vect_gen_widened_results_half
2135 Create a vector stmt whose code, type, number of arguments, and result
2136 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2137 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2138 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2139 needs to be created (DECL is a function-decl of a target-builtin).
2140 STMT is the original scalar stmt that we are vectorizing. */
2143 vect_gen_widened_results_half (enum tree_code code
,
2145 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2146 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2152 /* Generate half of the widened result: */
2153 if (code
== CALL_EXPR
)
2155 /* Target specific support */
2156 if (op_type
== binary_op
)
2157 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2159 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2160 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2161 gimple_call_set_lhs (new_stmt
, new_temp
);
2165 /* Generic support */
2166 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2167 if (op_type
!= binary_op
)
2169 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2171 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2172 gimple_assign_set_lhs (new_stmt
, new_temp
);
2174 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2180 /* Get vectorized definitions for loop-based vectorization. For the first
2181 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2182 scalar operand), and for the rest we get a copy with
2183 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2184 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2185 The vectors are collected into VEC_OPRNDS. */
2188 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2189 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
2193 /* Get first vector operand. */
2194 /* All the vector operands except the very first one (that is scalar oprnd)
2196 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2197 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2199 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2201 vec_oprnds
->quick_push (vec_oprnd
);
2203 /* Get second vector operand. */
2204 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2205 vec_oprnds
->quick_push (vec_oprnd
);
2209 /* For conversion in multiple steps, continue to get operands
2212 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2216 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2217 For multi-step conversions store the resulting vectors and call the function
2221 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
2222 int multi_step_cvt
, gimple stmt
,
2224 gimple_stmt_iterator
*gsi
,
2225 slp_tree slp_node
, enum tree_code code
,
2226 stmt_vec_info
*prev_stmt_info
)
2229 tree vop0
, vop1
, new_tmp
, vec_dest
;
2231 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2233 vec_dest
= vec_dsts
.pop ();
2235 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
2237 /* Create demotion operation. */
2238 vop0
= (*vec_oprnds
)[i
];
2239 vop1
= (*vec_oprnds
)[i
+ 1];
2240 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2241 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2242 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2243 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2246 /* Store the resulting vector for next recursive call. */
2247 (*vec_oprnds
)[i
/2] = new_tmp
;
2250 /* This is the last step of the conversion sequence. Store the
2251 vectors in SLP_NODE or in vector info of the scalar statement
2252 (or in STMT_VINFO_RELATED_STMT chain). */
2254 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2257 if (!*prev_stmt_info
)
2258 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2260 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2262 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2267 /* For multi-step demotion operations we first generate demotion operations
2268 from the source type to the intermediate types, and then combine the
2269 results (stored in VEC_OPRNDS) in demotion operation to the destination
2273 /* At each level of recursion we have half of the operands we had at the
2275 vec_oprnds
->truncate ((i
+1)/2);
2276 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2277 stmt
, vec_dsts
, gsi
, slp_node
,
2278 VEC_PACK_TRUNC_EXPR
,
2282 vec_dsts
.quick_push (vec_dest
);
2286 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2287 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2288 the resulting vectors and call the function recursively. */
2291 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
2292 vec
<tree
> *vec_oprnds1
,
2293 gimple stmt
, tree vec_dest
,
2294 gimple_stmt_iterator
*gsi
,
2295 enum tree_code code1
,
2296 enum tree_code code2
, tree decl1
,
2297 tree decl2
, int op_type
)
2300 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2301 gimple new_stmt1
, new_stmt2
;
2302 vec
<tree
> vec_tmp
= vNULL
;
2304 vec_tmp
.create (vec_oprnds0
->length () * 2);
2305 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
2307 if (op_type
== binary_op
)
2308 vop1
= (*vec_oprnds1
)[i
];
2312 /* Generate the two halves of promotion operation. */
2313 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2314 op_type
, vec_dest
, gsi
, stmt
);
2315 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2316 op_type
, vec_dest
, gsi
, stmt
);
2317 if (is_gimple_call (new_stmt1
))
2319 new_tmp1
= gimple_call_lhs (new_stmt1
);
2320 new_tmp2
= gimple_call_lhs (new_stmt2
);
2324 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2325 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2328 /* Store the results for the next step. */
2329 vec_tmp
.quick_push (new_tmp1
);
2330 vec_tmp
.quick_push (new_tmp2
);
2333 vec_oprnds0
->release ();
2334 *vec_oprnds0
= vec_tmp
;
2338 /* Check if STMT performs a conversion operation, that can be vectorized.
2339 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2340 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2341 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2344 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2345 gimple
*vec_stmt
, slp_tree slp_node
)
2349 tree op0
, op1
= NULL_TREE
;
2350 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2351 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2352 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2353 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2354 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2355 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2359 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2360 gimple new_stmt
= NULL
;
2361 stmt_vec_info prev_stmt_info
;
2364 tree vectype_out
, vectype_in
;
2366 tree lhs_type
, rhs_type
;
2367 enum { NARROW
, NONE
, WIDEN
} modifier
;
2368 vec
<tree
> vec_oprnds0
= vNULL
;
2369 vec
<tree
> vec_oprnds1
= vNULL
;
2371 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2372 int multi_step_cvt
= 0;
2373 vec
<tree
> vec_dsts
= vNULL
;
2374 vec
<tree
> interm_types
= vNULL
;
2375 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2377 enum machine_mode rhs_mode
;
2378 unsigned short fltsz
;
2380 /* Is STMT a vectorizable conversion? */
2382 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2385 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2388 if (!is_gimple_assign (stmt
))
2391 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2394 code
= gimple_assign_rhs_code (stmt
);
2395 if (!CONVERT_EXPR_CODE_P (code
)
2396 && code
!= FIX_TRUNC_EXPR
2397 && code
!= FLOAT_EXPR
2398 && code
!= WIDEN_MULT_EXPR
2399 && code
!= WIDEN_LSHIFT_EXPR
)
2402 op_type
= TREE_CODE_LENGTH (code
);
2404 /* Check types of lhs and rhs. */
2405 scalar_dest
= gimple_assign_lhs (stmt
);
2406 lhs_type
= TREE_TYPE (scalar_dest
);
2407 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2409 op0
= gimple_assign_rhs1 (stmt
);
2410 rhs_type
= TREE_TYPE (op0
);
2412 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2413 && !((INTEGRAL_TYPE_P (lhs_type
)
2414 && INTEGRAL_TYPE_P (rhs_type
))
2415 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2416 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2419 if ((INTEGRAL_TYPE_P (lhs_type
)
2420 && (TYPE_PRECISION (lhs_type
)
2421 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2422 || (INTEGRAL_TYPE_P (rhs_type
)
2423 && (TYPE_PRECISION (rhs_type
)
2424 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2426 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2428 "type conversion to/from bit-precision unsupported."
2433 /* Check the operands of the operation. */
2434 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2435 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2437 if (dump_enabled_p ())
2438 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2439 "use not simple.\n");
2442 if (op_type
== binary_op
)
2446 op1
= gimple_assign_rhs2 (stmt
);
2447 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2448 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2450 if (CONSTANT_CLASS_P (op0
))
2451 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2452 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2454 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2459 if (dump_enabled_p ())
2460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2461 "use not simple.\n");
2466 /* If op0 is an external or constant defs use a vector type of
2467 the same size as the output vector type. */
2469 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2471 gcc_assert (vectype_in
);
2474 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2477 "no vectype for scalar type ");
2478 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2479 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2485 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2486 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2487 if (nunits_in
< nunits_out
)
2489 else if (nunits_out
== nunits_in
)
2494 /* Multiple types in SLP are handled by creating the appropriate number of
2495 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2497 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2499 else if (modifier
== NARROW
)
2500 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2502 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2504 /* Sanity check: make sure that at least one copy of the vectorized stmt
2505 needs to be generated. */
2506 gcc_assert (ncopies
>= 1);
2508 /* Supportable by target? */
2512 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2514 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2519 if (dump_enabled_p ())
2520 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2521 "conversion not supported by target.\n");
2525 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2526 &code1
, &code2
, &multi_step_cvt
,
2529 /* Binary widening operation can only be supported directly by the
2531 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2535 if (code
!= FLOAT_EXPR
2536 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2537 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2540 rhs_mode
= TYPE_MODE (rhs_type
);
2541 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2542 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2543 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2544 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2547 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2548 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2549 if (cvt_type
== NULL_TREE
)
2552 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2554 if (!supportable_convert_operation (code
, vectype_out
,
2555 cvt_type
, &decl1
, &codecvt1
))
2558 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2559 cvt_type
, &codecvt1
,
2560 &codecvt2
, &multi_step_cvt
,
2564 gcc_assert (multi_step_cvt
== 0);
2566 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2567 vectype_in
, &code1
, &code2
,
2568 &multi_step_cvt
, &interm_types
))
2572 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2575 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2576 codecvt2
= ERROR_MARK
;
2580 interm_types
.safe_push (cvt_type
);
2581 cvt_type
= NULL_TREE
;
2586 gcc_assert (op_type
== unary_op
);
2587 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2588 &code1
, &multi_step_cvt
,
2592 if (code
!= FIX_TRUNC_EXPR
2593 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2594 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2597 rhs_mode
= TYPE_MODE (rhs_type
);
2599 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2600 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2601 if (cvt_type
== NULL_TREE
)
2603 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2606 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2607 &code1
, &multi_step_cvt
,
2616 if (!vec_stmt
) /* transformation not required. */
2618 if (dump_enabled_p ())
2619 dump_printf_loc (MSG_NOTE
, vect_location
,
2620 "=== vectorizable_conversion ===\n");
2621 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2623 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2624 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2626 else if (modifier
== NARROW
)
2628 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2629 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2633 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2634 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2636 interm_types
.release ();
2641 if (dump_enabled_p ())
2642 dump_printf_loc (MSG_NOTE
, vect_location
,
2643 "transform conversion. ncopies = %d.\n", ncopies
);
2645 if (op_type
== binary_op
)
2647 if (CONSTANT_CLASS_P (op0
))
2648 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2649 else if (CONSTANT_CLASS_P (op1
))
2650 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2653 /* In case of multi-step conversion, we first generate conversion operations
2654 to the intermediate types, and then from that types to the final one.
2655 We create vector destinations for the intermediate type (TYPES) received
2656 from supportable_*_operation, and store them in the correct order
2657 for future use in vect_create_vectorized_*_stmts (). */
2658 vec_dsts
.create (multi_step_cvt
+ 1);
2659 vec_dest
= vect_create_destination_var (scalar_dest
,
2660 (cvt_type
&& modifier
== WIDEN
)
2661 ? cvt_type
: vectype_out
);
2662 vec_dsts
.quick_push (vec_dest
);
2666 for (i
= interm_types
.length () - 1;
2667 interm_types
.iterate (i
, &intermediate_type
); i
--)
2669 vec_dest
= vect_create_destination_var (scalar_dest
,
2671 vec_dsts
.quick_push (vec_dest
);
2676 vec_dest
= vect_create_destination_var (scalar_dest
,
2678 ? vectype_out
: cvt_type
);
2682 if (modifier
== WIDEN
)
2684 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
2685 if (op_type
== binary_op
)
2686 vec_oprnds1
.create (1);
2688 else if (modifier
== NARROW
)
2689 vec_oprnds0
.create (
2690 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
2692 else if (code
== WIDEN_LSHIFT_EXPR
)
2693 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
2696 prev_stmt_info
= NULL
;
2700 for (j
= 0; j
< ncopies
; j
++)
2703 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2706 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2708 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2710 /* Arguments are ready, create the new vector stmt. */
2711 if (code1
== CALL_EXPR
)
2713 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2714 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2715 gimple_call_set_lhs (new_stmt
, new_temp
);
2719 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2720 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2722 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2723 gimple_assign_set_lhs (new_stmt
, new_temp
);
2726 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2728 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2732 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2734 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2735 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2740 /* In case the vectorization factor (VF) is bigger than the number
2741 of elements that we can fit in a vectype (nunits), we have to
2742 generate more than one vector stmt - i.e - we need to "unroll"
2743 the vector stmt by a factor VF/nunits. */
2744 for (j
= 0; j
< ncopies
; j
++)
2751 if (code
== WIDEN_LSHIFT_EXPR
)
2756 /* Store vec_oprnd1 for every vector stmt to be created
2757 for SLP_NODE. We check during the analysis that all
2758 the shift arguments are the same. */
2759 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2760 vec_oprnds1
.quick_push (vec_oprnd1
);
2762 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2766 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2767 &vec_oprnds1
, slp_node
, -1);
2771 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2772 vec_oprnds0
.quick_push (vec_oprnd0
);
2773 if (op_type
== binary_op
)
2775 if (code
== WIDEN_LSHIFT_EXPR
)
2778 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2780 vec_oprnds1
.quick_push (vec_oprnd1
);
2786 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2787 vec_oprnds0
.truncate (0);
2788 vec_oprnds0
.quick_push (vec_oprnd0
);
2789 if (op_type
== binary_op
)
2791 if (code
== WIDEN_LSHIFT_EXPR
)
2794 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2796 vec_oprnds1
.truncate (0);
2797 vec_oprnds1
.quick_push (vec_oprnd1
);
2801 /* Arguments are ready. Create the new vector stmts. */
2802 for (i
= multi_step_cvt
; i
>= 0; i
--)
2804 tree this_dest
= vec_dsts
[i
];
2805 enum tree_code c1
= code1
, c2
= code2
;
2806 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2811 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2813 stmt
, this_dest
, gsi
,
2814 c1
, c2
, decl1
, decl2
,
2818 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2822 if (codecvt1
== CALL_EXPR
)
2824 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2825 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2826 gimple_call_set_lhs (new_stmt
, new_temp
);
2830 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2831 new_temp
= make_ssa_name (vec_dest
, NULL
);
2832 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2837 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2840 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2843 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2846 if (!prev_stmt_info
)
2847 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2849 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2850 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2855 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2859 /* In case the vectorization factor (VF) is bigger than the number
2860 of elements that we can fit in a vectype (nunits), we have to
2861 generate more than one vector stmt - i.e - we need to "unroll"
2862 the vector stmt by a factor VF/nunits. */
2863 for (j
= 0; j
< ncopies
; j
++)
2867 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2871 vec_oprnds0
.truncate (0);
2872 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2873 vect_pow2 (multi_step_cvt
) - 1);
2876 /* Arguments are ready. Create the new vector stmts. */
2878 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2880 if (codecvt1
== CALL_EXPR
)
2882 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2883 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2884 gimple_call_set_lhs (new_stmt
, new_temp
);
2888 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2889 new_temp
= make_ssa_name (vec_dest
, NULL
);
2890 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2894 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2895 vec_oprnds0
[i
] = new_temp
;
2898 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2899 stmt
, vec_dsts
, gsi
,
2904 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2908 vec_oprnds0
.release ();
2909 vec_oprnds1
.release ();
2910 vec_dsts
.release ();
2911 interm_types
.release ();
2917 /* Function vectorizable_assignment.
2919 Check if STMT performs an assignment (copy) that can be vectorized.
2920 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2921 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2922 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2925 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2926 gimple
*vec_stmt
, slp_tree slp_node
)
2931 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2932 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2933 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2937 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2938 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2941 vec
<tree
> vec_oprnds
= vNULL
;
2943 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2944 gimple new_stmt
= NULL
;
2945 stmt_vec_info prev_stmt_info
= NULL
;
2946 enum tree_code code
;
2949 /* Multiple types in SLP are handled by creating the appropriate number of
2950 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2952 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2955 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2957 gcc_assert (ncopies
>= 1);
2959 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2962 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2965 /* Is vectorizable assignment? */
2966 if (!is_gimple_assign (stmt
))
2969 scalar_dest
= gimple_assign_lhs (stmt
);
2970 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2973 code
= gimple_assign_rhs_code (stmt
);
2974 if (gimple_assign_single_p (stmt
)
2975 || code
== PAREN_EXPR
2976 || CONVERT_EXPR_CODE_P (code
))
2977 op
= gimple_assign_rhs1 (stmt
);
2981 if (code
== VIEW_CONVERT_EXPR
)
2982 op
= TREE_OPERAND (op
, 0);
2984 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2985 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2987 if (dump_enabled_p ())
2988 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2989 "use not simple.\n");
2993 /* We can handle NOP_EXPR conversions that do not change the number
2994 of elements or the vector size. */
2995 if ((CONVERT_EXPR_CODE_P (code
)
2996 || code
== VIEW_CONVERT_EXPR
)
2998 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2999 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
3000 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
3003 /* We do not handle bit-precision changes. */
3004 if ((CONVERT_EXPR_CODE_P (code
)
3005 || code
== VIEW_CONVERT_EXPR
)
3006 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
3007 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3008 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3009 || ((TYPE_PRECISION (TREE_TYPE (op
))
3010 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
3011 /* But a conversion that does not change the bit-pattern is ok. */
3012 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3013 > TYPE_PRECISION (TREE_TYPE (op
)))
3014 && TYPE_UNSIGNED (TREE_TYPE (op
))))
3016 if (dump_enabled_p ())
3017 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3018 "type conversion to/from bit-precision "
3023 if (!vec_stmt
) /* transformation not required. */
3025 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3026 if (dump_enabled_p ())
3027 dump_printf_loc (MSG_NOTE
, vect_location
,
3028 "=== vectorizable_assignment ===\n");
3029 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3034 if (dump_enabled_p ())
3035 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
3038 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3041 for (j
= 0; j
< ncopies
; j
++)
3045 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
3047 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
3049 /* Arguments are ready. create the new vector stmt. */
3050 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3052 if (CONVERT_EXPR_CODE_P (code
)
3053 || code
== VIEW_CONVERT_EXPR
)
3054 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
3055 new_stmt
= gimple_build_assign (vec_dest
, vop
);
3056 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3057 gimple_assign_set_lhs (new_stmt
, new_temp
);
3058 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3060 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3067 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3069 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3071 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3074 vec_oprnds
.release ();
3079 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3080 either as shift by a scalar or by a vector. */
3083 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
3086 enum machine_mode vec_mode
;
3091 vectype
= get_vectype_for_scalar_type (scalar_type
);
3095 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3097 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
3099 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3101 || (optab_handler (optab
, TYPE_MODE (vectype
))
3102 == CODE_FOR_nothing
))
3106 vec_mode
= TYPE_MODE (vectype
);
3107 icode
= (int) optab_handler (optab
, vec_mode
);
3108 if (icode
== CODE_FOR_nothing
)
3115 /* Function vectorizable_shift.
3117 Check if STMT performs a shift operation that can be vectorized.
3118 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3119 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3120 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3123 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3124 gimple
*vec_stmt
, slp_tree slp_node
)
3128 tree op0
, op1
= NULL
;
3129 tree vec_oprnd1
= NULL_TREE
;
3130 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3132 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3133 enum tree_code code
;
3134 enum machine_mode vec_mode
;
3138 enum machine_mode optab_op2_mode
;
3141 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3142 gimple new_stmt
= NULL
;
3143 stmt_vec_info prev_stmt_info
;
3150 vec
<tree
> vec_oprnds0
= vNULL
;
3151 vec
<tree
> vec_oprnds1
= vNULL
;
3154 bool scalar_shift_arg
= true;
3155 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3158 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3161 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3164 /* Is STMT a vectorizable binary/unary operation? */
3165 if (!is_gimple_assign (stmt
))
3168 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3171 code
= gimple_assign_rhs_code (stmt
);
3173 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3174 || code
== RROTATE_EXPR
))
3177 scalar_dest
= gimple_assign_lhs (stmt
);
3178 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3179 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3180 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3182 if (dump_enabled_p ())
3183 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3184 "bit-precision shifts not supported.\n");
3188 op0
= gimple_assign_rhs1 (stmt
);
3189 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3190 &def_stmt
, &def
, &dt
[0], &vectype
))
3192 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3194 "use not simple.\n");
3197 /* If op0 is an external or constant def use a vector type with
3198 the same size as the output vector type. */
3200 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3202 gcc_assert (vectype
);
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3207 "no vectype for scalar type\n");
3211 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3212 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3213 if (nunits_out
!= nunits_in
)
3216 op1
= gimple_assign_rhs2 (stmt
);
3217 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3218 &def
, &dt
[1], &op1_vectype
))
3220 if (dump_enabled_p ())
3221 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3222 "use not simple.\n");
3227 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3231 /* Multiple types in SLP are handled by creating the appropriate number of
3232 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3234 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3237 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3239 gcc_assert (ncopies
>= 1);
3241 /* Determine whether the shift amount is a vector, or scalar. If the
3242 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3244 if (dt
[1] == vect_internal_def
&& !slp_node
)
3245 scalar_shift_arg
= false;
3246 else if (dt
[1] == vect_constant_def
3247 || dt
[1] == vect_external_def
3248 || dt
[1] == vect_internal_def
)
3250 /* In SLP, need to check whether the shift count is the same,
3251 in loops if it is a constant or invariant, it is always
3255 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3258 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
3259 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3260 scalar_shift_arg
= false;
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3267 "operand mode requires invariant argument.\n");
3271 /* Vector shifted by vector. */
3272 if (!scalar_shift_arg
)
3274 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3275 if (dump_enabled_p ())
3276 dump_printf_loc (MSG_NOTE
, vect_location
,
3277 "vector/vector shift/rotate found.\n");
3280 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3281 if (op1_vectype
== NULL_TREE
3282 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3286 "unusable type for last operand in"
3287 " vector/vector shift/rotate.\n");
3291 /* See if the machine has a vector shifted by scalar insn and if not
3292 then see if it has a vector shifted by vector insn. */
3295 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3297 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_NOTE
, vect_location
,
3301 "vector/scalar shift/rotate found.\n");
3305 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3307 && (optab_handler (optab
, TYPE_MODE (vectype
))
3308 != CODE_FOR_nothing
))
3310 scalar_shift_arg
= false;
3312 if (dump_enabled_p ())
3313 dump_printf_loc (MSG_NOTE
, vect_location
,
3314 "vector/vector shift/rotate found.\n");
3316 /* Unlike the other binary operators, shifts/rotates have
3317 the rhs being int, instead of the same type as the lhs,
3318 so make sure the scalar is the right type if we are
3319 dealing with vectors of long long/long/short/char. */
3320 if (dt
[1] == vect_constant_def
)
3321 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3322 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3326 && TYPE_MODE (TREE_TYPE (vectype
))
3327 != TYPE_MODE (TREE_TYPE (op1
)))
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3331 "unusable type for last operand in"
3332 " vector/vector shift/rotate.\n");
3335 if (vec_stmt
&& !slp_node
)
3337 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3338 op1
= vect_init_vector (stmt
, op1
,
3339 TREE_TYPE (vectype
), NULL
);
3346 /* Supportable by target? */
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3354 vec_mode
= TYPE_MODE (vectype
);
3355 icode
= (int) optab_handler (optab
, vec_mode
);
3356 if (icode
== CODE_FOR_nothing
)
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3360 "op not supported by target.\n");
3361 /* Check only during analysis. */
3362 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3363 || (vf
< vect_min_worthwhile_factor (code
)
3366 if (dump_enabled_p ())
3367 dump_printf_loc (MSG_NOTE
, vect_location
,
3368 "proceeding using word mode.\n");
3371 /* Worthwhile without SIMD support? Check only during analysis. */
3372 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3373 && vf
< vect_min_worthwhile_factor (code
)
3376 if (dump_enabled_p ())
3377 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3378 "not worthwhile without SIMD support.\n");
3382 if (!vec_stmt
) /* transformation not required. */
3384 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_NOTE
, vect_location
,
3387 "=== vectorizable_shift ===\n");
3388 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3394 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_NOTE
, vect_location
,
3396 "transform binary/unary operation.\n");
3399 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3401 prev_stmt_info
= NULL
;
3402 for (j
= 0; j
< ncopies
; j
++)
3407 if (scalar_shift_arg
)
3409 /* Vector shl and shr insn patterns can be defined with scalar
3410 operand 2 (shift operand). In this case, use constant or loop
3411 invariant op1 directly, without extending it to vector mode
3413 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3414 if (!VECTOR_MODE_P (optab_op2_mode
))
3416 if (dump_enabled_p ())
3417 dump_printf_loc (MSG_NOTE
, vect_location
,
3418 "operand 1 using scalar mode.\n");
3420 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
3421 vec_oprnds1
.quick_push (vec_oprnd1
);
3424 /* Store vec_oprnd1 for every vector stmt to be created
3425 for SLP_NODE. We check during the analysis that all
3426 the shift arguments are the same.
3427 TODO: Allow different constants for different vector
3428 stmts generated for an SLP instance. */
3429 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3430 vec_oprnds1
.quick_push (vec_oprnd1
);
3435 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3436 (a special case for certain kind of vector shifts); otherwise,
3437 operand 1 should be of a vector type (the usual case). */
3439 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3442 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3446 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3448 /* Arguments are ready. Create the new vector stmt. */
3449 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3451 vop1
= vec_oprnds1
[i
];
3452 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3453 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3454 gimple_assign_set_lhs (new_stmt
, new_temp
);
3455 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3457 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3464 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3466 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3467 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3470 vec_oprnds0
.release ();
3471 vec_oprnds1
.release ();
3477 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3478 gimple_stmt_iterator
*);
3481 /* Function vectorizable_operation.
3483 Check if STMT performs a binary, unary or ternary operation that can
3485 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3486 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3487 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3490 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3491 gimple
*vec_stmt
, slp_tree slp_node
)
3495 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3496 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3498 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3499 enum tree_code code
;
3500 enum machine_mode vec_mode
;
3507 enum vect_def_type dt
[3]
3508 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3509 gimple new_stmt
= NULL
;
3510 stmt_vec_info prev_stmt_info
;
3516 vec
<tree
> vec_oprnds0
= vNULL
;
3517 vec
<tree
> vec_oprnds1
= vNULL
;
3518 vec
<tree
> vec_oprnds2
= vNULL
;
3519 tree vop0
, vop1
, vop2
;
3520 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3523 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3526 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3529 /* Is STMT a vectorizable binary/unary operation? */
3530 if (!is_gimple_assign (stmt
))
3533 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3536 code
= gimple_assign_rhs_code (stmt
);
3538 /* For pointer addition, we should use the normal plus for
3539 the vector addition. */
3540 if (code
== POINTER_PLUS_EXPR
)
3543 /* Support only unary or binary operations. */
3544 op_type
= TREE_CODE_LENGTH (code
);
3545 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3547 if (dump_enabled_p ())
3548 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3549 "num. args = %d (not unary/binary/ternary op).\n",
3554 scalar_dest
= gimple_assign_lhs (stmt
);
3555 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3557 /* Most operations cannot handle bit-precision types without extra
3559 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3560 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3561 /* Exception are bitwise binary operations. */
3562 && code
!= BIT_IOR_EXPR
3563 && code
!= BIT_XOR_EXPR
3564 && code
!= BIT_AND_EXPR
)
3566 if (dump_enabled_p ())
3567 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3568 "bit-precision arithmetic not supported.\n");
3572 op0
= gimple_assign_rhs1 (stmt
);
3573 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3574 &def_stmt
, &def
, &dt
[0], &vectype
))
3576 if (dump_enabled_p ())
3577 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3578 "use not simple.\n");
3581 /* If op0 is an external or constant def use a vector type with
3582 the same size as the output vector type. */
3584 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3586 gcc_assert (vectype
);
3589 if (dump_enabled_p ())
3591 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3592 "no vectype for scalar type ");
3593 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3595 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3601 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3602 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3603 if (nunits_out
!= nunits_in
)
3606 if (op_type
== binary_op
|| op_type
== ternary_op
)
3608 op1
= gimple_assign_rhs2 (stmt
);
3609 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3612 if (dump_enabled_p ())
3613 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3614 "use not simple.\n");
3618 if (op_type
== ternary_op
)
3620 op2
= gimple_assign_rhs3 (stmt
);
3621 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3624 if (dump_enabled_p ())
3625 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3626 "use not simple.\n");
3632 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3636 /* Multiple types in SLP are handled by creating the appropriate number of
3637 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3639 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3642 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3644 gcc_assert (ncopies
>= 1);
3646 /* Shifts are handled in vectorizable_shift (). */
3647 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3648 || code
== RROTATE_EXPR
)
3651 /* Supportable by target? */
3653 vec_mode
= TYPE_MODE (vectype
);
3654 if (code
== MULT_HIGHPART_EXPR
)
3656 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3657 icode
= LAST_INSN_CODE
;
3659 icode
= CODE_FOR_nothing
;
3663 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3666 if (dump_enabled_p ())
3667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3671 icode
= (int) optab_handler (optab
, vec_mode
);
3674 if (icode
== CODE_FOR_nothing
)
3676 if (dump_enabled_p ())
3677 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3678 "op not supported by target.\n");
3679 /* Check only during analysis. */
3680 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3681 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3683 if (dump_enabled_p ())
3684 dump_printf_loc (MSG_NOTE
, vect_location
,
3685 "proceeding using word mode.\n");
3688 /* Worthwhile without SIMD support? Check only during analysis. */
3689 if (!VECTOR_MODE_P (vec_mode
)
3691 && vf
< vect_min_worthwhile_factor (code
))
3693 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3695 "not worthwhile without SIMD support.\n");
3699 if (!vec_stmt
) /* transformation not required. */
3701 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3702 if (dump_enabled_p ())
3703 dump_printf_loc (MSG_NOTE
, vect_location
,
3704 "=== vectorizable_operation ===\n");
3705 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3711 if (dump_enabled_p ())
3712 dump_printf_loc (MSG_NOTE
, vect_location
,
3713 "transform binary/unary operation.\n");
3716 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3718 /* In case the vectorization factor (VF) is bigger than the number
3719 of elements that we can fit in a vectype (nunits), we have to generate
3720 more than one vector stmt - i.e - we need to "unroll" the
3721 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3722 from one copy of the vector stmt to the next, in the field
3723 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3724 stages to find the correct vector defs to be used when vectorizing
3725 stmts that use the defs of the current stmt. The example below
3726 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3727 we need to create 4 vectorized stmts):
3729 before vectorization:
3730 RELATED_STMT VEC_STMT
3734 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3736 RELATED_STMT VEC_STMT
3737 VS1_0: vx0 = memref0 VS1_1 -
3738 VS1_1: vx1 = memref1 VS1_2 -
3739 VS1_2: vx2 = memref2 VS1_3 -
3740 VS1_3: vx3 = memref3 - -
3741 S1: x = load - VS1_0
3744 step2: vectorize stmt S2 (done here):
3745 To vectorize stmt S2 we first need to find the relevant vector
3746 def for the first operand 'x'. This is, as usual, obtained from
3747 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3748 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3749 relevant vector def 'vx0'. Having found 'vx0' we can generate
3750 the vector stmt VS2_0, and as usual, record it in the
3751 STMT_VINFO_VEC_STMT of stmt S2.
3752 When creating the second copy (VS2_1), we obtain the relevant vector
3753 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3754 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3755 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3756 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3757 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3758 chain of stmts and pointers:
3759 RELATED_STMT VEC_STMT
3760 VS1_0: vx0 = memref0 VS1_1 -
3761 VS1_1: vx1 = memref1 VS1_2 -
3762 VS1_2: vx2 = memref2 VS1_3 -
3763 VS1_3: vx3 = memref3 - -
3764 S1: x = load - VS1_0
3765 VS2_0: vz0 = vx0 + v1 VS2_1 -
3766 VS2_1: vz1 = vx1 + v1 VS2_2 -
3767 VS2_2: vz2 = vx2 + v1 VS2_3 -
3768 VS2_3: vz3 = vx3 + v1 - -
3769 S2: z = x + 1 - VS2_0 */
3771 prev_stmt_info
= NULL
;
3772 for (j
= 0; j
< ncopies
; j
++)
3777 if (op_type
== binary_op
|| op_type
== ternary_op
)
3778 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3781 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3783 if (op_type
== ternary_op
)
3785 vec_oprnds2
.create (1);
3786 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
3793 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3794 if (op_type
== ternary_op
)
3796 tree vec_oprnd
= vec_oprnds2
.pop ();
3797 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
3802 /* Arguments are ready. Create the new vector stmt. */
3803 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3805 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3806 ? vec_oprnds1
[i
] : NULL_TREE
);
3807 vop2
= ((op_type
== ternary_op
)
3808 ? vec_oprnds2
[i
] : NULL_TREE
);
3809 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3811 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3812 gimple_assign_set_lhs (new_stmt
, new_temp
);
3813 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3815 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3822 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3824 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3825 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3828 vec_oprnds0
.release ();
3829 vec_oprnds1
.release ();
3830 vec_oprnds2
.release ();
3835 /* A helper function to ensure data reference DR's base alignment
3839 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
3844 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
3846 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3847 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
3849 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
3850 DECL_USER_ALIGN (base_decl
) = 1;
3851 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
3856 /* Function vectorizable_store.
3858 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3860 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3861 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3862 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3865 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3871 tree vec_oprnd
= NULL_TREE
;
3872 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3873 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3874 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3876 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3877 struct loop
*loop
= NULL
;
3878 enum machine_mode vec_mode
;
3880 enum dr_alignment_support alignment_support_scheme
;
3883 enum vect_def_type dt
;
3884 stmt_vec_info prev_stmt_info
= NULL
;
3885 tree dataref_ptr
= NULL_TREE
;
3886 tree dataref_offset
= NULL_TREE
;
3887 gimple ptr_incr
= NULL
;
3888 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3891 gimple next_stmt
, first_stmt
= NULL
;
3892 bool grouped_store
= false;
3893 bool store_lanes_p
= false;
3894 unsigned int group_size
, i
;
3895 vec
<tree
> dr_chain
= vNULL
;
3896 vec
<tree
> oprnds
= vNULL
;
3897 vec
<tree
> result_chain
= vNULL
;
3899 vec
<tree
> vec_oprnds
= vNULL
;
3900 bool slp
= (slp_node
!= NULL
);
3901 unsigned int vec_num
;
3902 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3906 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3908 /* Multiple types in SLP are handled by creating the appropriate number of
3909 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3911 if (slp
|| PURE_SLP_STMT (stmt_info
))
3914 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3916 gcc_assert (ncopies
>= 1);
3918 /* FORNOW. This restriction should be relaxed. */
3919 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3921 if (dump_enabled_p ())
3922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3923 "multiple types in nested loop.\n");
3927 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3930 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3933 /* Is vectorizable store? */
3935 if (!is_gimple_assign (stmt
))
3938 scalar_dest
= gimple_assign_lhs (stmt
);
3939 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3940 && is_pattern_stmt_p (stmt_info
))
3941 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3942 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3943 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
3944 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3945 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3946 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3947 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3948 && TREE_CODE (scalar_dest
) != MEM_REF
)
3951 gcc_assert (gimple_assign_single_p (stmt
));
3952 op
= gimple_assign_rhs1 (stmt
);
3953 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3956 if (dump_enabled_p ())
3957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3958 "use not simple.\n");
3962 elem_type
= TREE_TYPE (vectype
);
3963 vec_mode
= TYPE_MODE (vectype
);
3965 /* FORNOW. In some cases can vectorize even if data-type not supported
3966 (e.g. - array initialization with 0). */
3967 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3970 if (!STMT_VINFO_DATA_REF (stmt_info
))
3973 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3974 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3975 size_zero_node
) < 0)
3977 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3979 "negative step for store.\n");
3983 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3985 grouped_store
= true;
3986 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3987 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3989 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3990 if (vect_store_lanes_supported (vectype
, group_size
))
3991 store_lanes_p
= true;
3992 else if (!vect_grouped_store_supported (vectype
, group_size
))
3996 if (first_stmt
== stmt
)
3998 /* STMT is the leader of the group. Check the operands of all the
3999 stmts of the group. */
4000 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
4003 gcc_assert (gimple_assign_single_p (next_stmt
));
4004 op
= gimple_assign_rhs1 (next_stmt
);
4005 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
4006 &def_stmt
, &def
, &dt
))
4008 if (dump_enabled_p ())
4009 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4010 "use not simple.\n");
4013 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4018 if (!vec_stmt
) /* transformation not required. */
4020 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
4021 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
4028 ensure_base_align (stmt_info
, dr
);
4032 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4033 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4035 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
4038 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
4040 /* We vectorize all the stmts of the interleaving group when we
4041 reach the last stmt in the group. */
4042 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
4043 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
4052 grouped_store
= false;
4053 /* VEC_NUM is the number of vect stmts to be created for this
4055 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4056 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4057 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4058 op
= gimple_assign_rhs1 (first_stmt
);
4061 /* VEC_NUM is the number of vect stmts to be created for this
4063 vec_num
= group_size
;
4069 group_size
= vec_num
= 1;
4072 if (dump_enabled_p ())
4073 dump_printf_loc (MSG_NOTE
, vect_location
,
4074 "transform store. ncopies = %d\n", ncopies
);
4076 dr_chain
.create (group_size
);
4077 oprnds
.create (group_size
);
4079 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4080 gcc_assert (alignment_support_scheme
);
4081 /* Targets with store-lane instructions must not require explicit
4083 gcc_assert (!store_lanes_p
4084 || alignment_support_scheme
== dr_aligned
4085 || alignment_support_scheme
== dr_unaligned_supported
);
4088 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4090 aggr_type
= vectype
;
4092 /* In case the vectorization factor (VF) is bigger than the number
4093 of elements that we can fit in a vectype (nunits), we have to generate
4094 more than one vector stmt - i.e - we need to "unroll" the
4095 vector stmt by a factor VF/nunits. For more details see documentation in
4096 vect_get_vec_def_for_copy_stmt. */
4098 /* In case of interleaving (non-unit grouped access):
4105 We create vectorized stores starting from base address (the access of the
4106 first stmt in the chain (S2 in the above example), when the last store stmt
4107 of the chain (S4) is reached:
4110 VS2: &base + vec_size*1 = vx0
4111 VS3: &base + vec_size*2 = vx1
4112 VS4: &base + vec_size*3 = vx3
4114 Then permutation statements are generated:
4116 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4117 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4120 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4121 (the order of the data-refs in the output of vect_permute_store_chain
4122 corresponds to the order of scalar stmts in the interleaving chain - see
4123 the documentation of vect_permute_store_chain()).
4125 In case of both multiple types and interleaving, above vector stores and
4126 permutation stmts are created for every copy. The result vector stmts are
4127 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4128 STMT_VINFO_RELATED_STMT for the next copies.
4131 prev_stmt_info
= NULL
;
4132 for (j
= 0; j
< ncopies
; j
++)
4140 /* Get vectorized arguments for SLP_NODE. */
4141 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4142 NULL
, slp_node
, -1);
4144 vec_oprnd
= vec_oprnds
[0];
4148 /* For interleaved stores we collect vectorized defs for all the
4149 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4150 used as an input to vect_permute_store_chain(), and OPRNDS as
4151 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4153 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4154 OPRNDS are of size 1. */
4155 next_stmt
= first_stmt
;
4156 for (i
= 0; i
< group_size
; i
++)
4158 /* Since gaps are not supported for interleaved stores,
4159 GROUP_SIZE is the exact number of stmts in the chain.
4160 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4161 there is no interleaving, GROUP_SIZE is 1, and only one
4162 iteration of the loop will be executed. */
4163 gcc_assert (next_stmt
4164 && gimple_assign_single_p (next_stmt
));
4165 op
= gimple_assign_rhs1 (next_stmt
);
4167 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4169 dr_chain
.quick_push (vec_oprnd
);
4170 oprnds
.quick_push (vec_oprnd
);
4171 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4175 /* We should have catched mismatched types earlier. */
4176 gcc_assert (useless_type_conversion_p (vectype
,
4177 TREE_TYPE (vec_oprnd
)));
4178 bool simd_lane_access_p
4179 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
4180 if (simd_lane_access_p
4181 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
4182 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
4183 && integer_zerop (DR_OFFSET (first_dr
))
4184 && integer_zerop (DR_INIT (first_dr
))
4185 && alias_sets_conflict_p (get_alias_set (aggr_type
),
4186 get_alias_set (DR_REF (first_dr
))))
4188 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
4189 dataref_offset
= build_int_cst (reference_alias_ptr_type
4190 (DR_REF (first_dr
)), 0);
4195 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
4196 simd_lane_access_p
? loop
: NULL
,
4197 NULL_TREE
, &dummy
, gsi
, &ptr_incr
,
4198 simd_lane_access_p
, &inv_p
);
4199 gcc_assert (bb_vinfo
|| !inv_p
);
4203 /* For interleaved stores we created vectorized defs for all the
4204 defs stored in OPRNDS in the previous iteration (previous copy).
4205 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4206 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4208 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4209 OPRNDS are of size 1. */
4210 for (i
= 0; i
< group_size
; i
++)
4213 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4215 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4216 dr_chain
[i
] = vec_oprnd
;
4217 oprnds
[i
] = vec_oprnd
;
4221 = int_const_binop (PLUS_EXPR
, dataref_offset
,
4222 TYPE_SIZE_UNIT (aggr_type
));
4224 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4225 TYPE_SIZE_UNIT (aggr_type
));
4232 /* Combine all the vectors into an array. */
4233 vec_array
= create_vector_array (vectype
, vec_num
);
4234 for (i
= 0; i
< vec_num
; i
++)
4236 vec_oprnd
= dr_chain
[i
];
4237 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4241 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4242 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4243 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4244 gimple_call_set_lhs (new_stmt
, data_ref
);
4245 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4253 result_chain
.create (group_size
);
4255 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4259 next_stmt
= first_stmt
;
4260 for (i
= 0; i
< vec_num
; i
++)
4262 unsigned align
, misalign
;
4265 /* Bump the vector pointer. */
4266 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4270 vec_oprnd
= vec_oprnds
[i
];
4271 else if (grouped_store
)
4272 /* For grouped stores vectorized defs are interleaved in
4273 vect_permute_store_chain(). */
4274 vec_oprnd
= result_chain
[i
];
4276 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4279 : build_int_cst (reference_alias_ptr_type
4280 (DR_REF (first_dr
)), 0));
4281 align
= TYPE_ALIGN_UNIT (vectype
);
4282 if (aligned_access_p (first_dr
))
4284 else if (DR_MISALIGNMENT (first_dr
) == -1)
4286 TREE_TYPE (data_ref
)
4287 = build_aligned_type (TREE_TYPE (data_ref
),
4288 TYPE_ALIGN (elem_type
));
4289 align
= TYPE_ALIGN_UNIT (elem_type
);
4294 TREE_TYPE (data_ref
)
4295 = build_aligned_type (TREE_TYPE (data_ref
),
4296 TYPE_ALIGN (elem_type
));
4297 misalign
= DR_MISALIGNMENT (first_dr
);
4299 if (dataref_offset
== NULL_TREE
)
4300 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4303 /* Arguments are ready. Create the new vector stmt. */
4304 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4305 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4310 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4318 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4320 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4321 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4325 dr_chain
.release ();
4327 result_chain
.release ();
4328 vec_oprnds
.release ();
4333 /* Given a vector type VECTYPE and permutation SEL returns
4334 the VECTOR_CST mask that implements the permutation of the
4335 vector elements. If that is impossible to do, returns NULL. */
4338 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4340 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4343 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4345 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4348 mask_elt_type
= lang_hooks
.types
.type_for_mode
4349 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4350 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4352 mask_elts
= XALLOCAVEC (tree
, nunits
);
4353 for (i
= nunits
- 1; i
>= 0; i
--)
4354 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4355 mask_vec
= build_vector (mask_type
, mask_elts
);
4360 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4361 reversal of the vector elements. If that is impossible to do,
4365 perm_mask_for_reverse (tree vectype
)
4370 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4371 sel
= XALLOCAVEC (unsigned char, nunits
);
4373 for (i
= 0; i
< nunits
; ++i
)
4374 sel
[i
] = nunits
- 1 - i
;
4376 return vect_gen_perm_mask (vectype
, sel
);
4379 /* Given a vector variable X and Y, that was generated for the scalar
4380 STMT, generate instructions to permute the vector elements of X and Y
4381 using permutation mask MASK_VEC, insert them at *GSI and return the
4382 permuted vector variable. */
4385 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4386 gimple_stmt_iterator
*gsi
)
4388 tree vectype
= TREE_TYPE (x
);
4389 tree perm_dest
, data_ref
;
4392 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4393 data_ref
= make_ssa_name (perm_dest
, NULL
);
4395 /* Generate the permute statement. */
4396 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4398 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4403 /* vectorizable_load.
4405 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4407 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4408 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4409 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4412 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4413 slp_tree slp_node
, slp_instance slp_node_instance
)
4416 tree vec_dest
= NULL
;
4417 tree data_ref
= NULL
;
4418 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4419 stmt_vec_info prev_stmt_info
;
4420 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4421 struct loop
*loop
= NULL
;
4422 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4423 bool nested_in_vect_loop
= false;
4424 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4425 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4428 enum machine_mode mode
;
4429 gimple new_stmt
= NULL
;
4431 enum dr_alignment_support alignment_support_scheme
;
4432 tree dataref_ptr
= NULL_TREE
;
4433 tree dataref_offset
= NULL_TREE
;
4434 gimple ptr_incr
= NULL
;
4435 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4437 int i
, j
, group_size
, group_gap
;
4438 tree msq
= NULL_TREE
, lsq
;
4439 tree offset
= NULL_TREE
;
4440 tree realignment_token
= NULL_TREE
;
4442 vec
<tree
> dr_chain
= vNULL
;
4443 bool grouped_load
= false;
4444 bool load_lanes_p
= false;
4447 bool negative
= false;
4448 bool compute_in_loop
= false;
4449 struct loop
*at_loop
;
4451 bool slp
= (slp_node
!= NULL
);
4452 bool slp_perm
= false;
4453 enum tree_code code
;
4454 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4457 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4458 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4459 int gather_scale
= 1;
4460 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4464 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4465 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4466 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4471 /* Multiple types in SLP are handled by creating the appropriate number of
4472 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4474 if (slp
|| PURE_SLP_STMT (stmt_info
))
4477 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4479 gcc_assert (ncopies
>= 1);
4481 /* FORNOW. This restriction should be relaxed. */
4482 if (nested_in_vect_loop
&& ncopies
> 1)
4484 if (dump_enabled_p ())
4485 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4486 "multiple types in nested loop.\n");
4490 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4493 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4496 /* Is vectorizable load? */
4497 if (!is_gimple_assign (stmt
))
4500 scalar_dest
= gimple_assign_lhs (stmt
);
4501 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4504 code
= gimple_assign_rhs_code (stmt
);
4505 if (code
!= ARRAY_REF
4506 && code
!= BIT_FIELD_REF
4507 && code
!= INDIRECT_REF
4508 && code
!= COMPONENT_REF
4509 && code
!= IMAGPART_EXPR
4510 && code
!= REALPART_EXPR
4512 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4515 if (!STMT_VINFO_DATA_REF (stmt_info
))
4518 elem_type
= TREE_TYPE (vectype
);
4519 mode
= TYPE_MODE (vectype
);
4521 /* FORNOW. In some cases can vectorize even if data-type not supported
4522 (e.g. - data copies). */
4523 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4525 if (dump_enabled_p ())
4526 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4527 "Aligned load, but unsupported type.\n");
4531 /* Check if the load is a part of an interleaving chain. */
4532 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4534 grouped_load
= true;
4536 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4538 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4539 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4541 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4542 if (vect_load_lanes_supported (vectype
, group_size
))
4543 load_lanes_p
= true;
4544 else if (!vect_grouped_load_supported (vectype
, group_size
))
4550 if (STMT_VINFO_GATHER_P (stmt_info
))
4554 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4555 &gather_off
, &gather_scale
);
4556 gcc_assert (gather_decl
);
4557 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4558 &def_stmt
, &def
, &gather_dt
,
4559 &gather_off_vectype
))
4561 if (dump_enabled_p ())
4562 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4563 "gather index use not simple.\n");
4567 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4571 negative
= tree_int_cst_compare (nested_in_vect_loop
4572 ? STMT_VINFO_DR_STEP (stmt_info
)
4574 size_zero_node
) < 0;
4575 if (negative
&& ncopies
> 1)
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4579 "multiple types with negative step.\n");
4587 if (dump_enabled_p ())
4588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4589 "negative step for group load not supported"
4593 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4594 if (alignment_support_scheme
!= dr_aligned
4595 && alignment_support_scheme
!= dr_unaligned_supported
)
4597 if (dump_enabled_p ())
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4599 "negative step but alignment required.\n");
4602 if (!perm_mask_for_reverse (vectype
))
4604 if (dump_enabled_p ())
4605 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4606 "negative step and reversing not supported."
4613 if (!vec_stmt
) /* transformation not required. */
4615 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4616 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4620 if (dump_enabled_p ())
4621 dump_printf_loc (MSG_NOTE
, vect_location
,
4622 "transform load. ncopies = %d\n", ncopies
);
4626 ensure_base_align (stmt_info
, dr
);
4628 if (STMT_VINFO_GATHER_P (stmt_info
))
4630 tree vec_oprnd0
= NULL_TREE
, op
;
4631 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4632 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4633 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4634 edge pe
= loop_preheader_edge (loop
);
4637 enum { NARROW
, NONE
, WIDEN
} modifier
;
4638 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4640 if (nunits
== gather_off_nunits
)
4642 else if (nunits
== gather_off_nunits
/ 2)
4644 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4647 for (i
= 0; i
< gather_off_nunits
; ++i
)
4648 sel
[i
] = i
| nunits
;
4650 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4651 gcc_assert (perm_mask
!= NULL_TREE
);
4653 else if (nunits
== gather_off_nunits
* 2)
4655 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4658 for (i
= 0; i
< nunits
; ++i
)
4659 sel
[i
] = i
< gather_off_nunits
4660 ? i
: i
+ nunits
- gather_off_nunits
;
4662 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4663 gcc_assert (perm_mask
!= NULL_TREE
);
4669 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4670 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4671 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4672 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4673 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4674 scaletype
= TREE_VALUE (arglist
);
4675 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4676 && types_compatible_p (srctype
, masktype
));
4678 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4680 ptr
= fold_convert (ptrtype
, gather_base
);
4681 if (!is_gimple_min_invariant (ptr
))
4683 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4684 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4685 gcc_assert (!new_bb
);
4688 /* Currently we support only unconditional gather loads,
4689 so mask should be all ones. */
4690 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4691 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4692 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4696 for (j
= 0; j
< 6; ++j
)
4698 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4699 mask
= build_real (TREE_TYPE (masktype
), r
);
4703 mask
= build_vector_from_val (masktype
, mask
);
4704 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4706 scale
= build_int_cst (scaletype
, gather_scale
);
4708 prev_stmt_info
= NULL
;
4709 for (j
= 0; j
< ncopies
; ++j
)
4711 if (modifier
== WIDEN
&& (j
& 1))
4712 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4713 perm_mask
, stmt
, gsi
);
4716 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4719 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4721 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4723 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4724 == TYPE_VECTOR_SUBPARTS (idxtype
));
4725 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4726 var
= make_ssa_name (var
, NULL
);
4727 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4729 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4731 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4736 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4738 if (!useless_type_conversion_p (vectype
, rettype
))
4740 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4741 == TYPE_VECTOR_SUBPARTS (rettype
));
4742 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4743 op
= make_ssa_name (var
, new_stmt
);
4744 gimple_call_set_lhs (new_stmt
, op
);
4745 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4746 var
= make_ssa_name (vec_dest
, NULL
);
4747 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4749 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4754 var
= make_ssa_name (vec_dest
, new_stmt
);
4755 gimple_call_set_lhs (new_stmt
, var
);
4758 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4760 if (modifier
== NARROW
)
4767 var
= permute_vec_elements (prev_res
, var
,
4768 perm_mask
, stmt
, gsi
);
4769 new_stmt
= SSA_NAME_DEF_STMT (var
);
4772 if (prev_stmt_info
== NULL
)
4773 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4775 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4776 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4780 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4782 gimple_stmt_iterator incr_gsi
;
4788 vec
<constructor_elt
, va_gc
> *v
= NULL
;
4789 gimple_seq stmts
= NULL
;
4790 tree stride_base
, stride_step
, alias_off
;
4792 gcc_assert (!nested_in_vect_loop
);
4795 = fold_build_pointer_plus
4796 (unshare_expr (DR_BASE_ADDRESS (dr
)),
4797 size_binop (PLUS_EXPR
,
4798 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
4799 convert_to_ptrofftype (DR_INIT (dr
))));
4800 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
4802 /* For a load with loop-invariant (but other than power-of-2)
4803 stride (i.e. not a grouped access) like so:
4805 for (i = 0; i < n; i += stride)
4808 we generate a new induction variable and new accesses to
4809 form a new vector (or vectors, depending on ncopies):
4811 for (j = 0; ; j += VF*stride)
4813 tmp2 = array[j + stride];
4815 vectemp = {tmp1, tmp2, ...}
4818 ivstep
= stride_step
;
4819 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4820 build_int_cst (TREE_TYPE (ivstep
), vf
));
4822 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4824 create_iv (stride_base
, ivstep
, NULL
,
4825 loop
, &incr_gsi
, insert_after
,
4827 incr
= gsi_stmt (incr_gsi
);
4828 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4830 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4832 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4834 prev_stmt_info
= NULL
;
4835 running_off
= offvar
;
4836 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
4837 for (j
= 0; j
< ncopies
; j
++)
4841 vec_alloc (v
, nunits
);
4842 for (i
= 0; i
< nunits
; i
++)
4844 tree newref
, newoff
;
4846 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
4847 running_off
, alias_off
);
4849 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4852 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4853 newoff
= copy_ssa_name (running_off
, NULL
);
4854 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4855 running_off
, stride_step
);
4856 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4858 running_off
= newoff
;
4861 vec_inv
= build_constructor (vectype
, v
);
4862 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4863 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4866 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4868 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4869 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4876 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4878 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
4879 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
4880 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4882 /* Check if the chain of loads is already vectorized. */
4883 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
4884 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4885 ??? But we can only do so if there is exactly one
4886 as we have no way to get at the rest. Leave the CSE
4888 ??? With the group load eventually participating
4889 in multiple different permutations (having multiple
4890 slp nodes which refer to the same group) the CSE
4891 is even wrong code. See PR56270. */
4894 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4897 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4898 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4900 /* VEC_NUM is the number of vect stmts to be created for this group. */
4903 grouped_load
= false;
4904 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4905 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
4907 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
4911 vec_num
= group_size
;
4919 group_size
= vec_num
= 1;
4923 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4924 gcc_assert (alignment_support_scheme
);
4925 /* Targets with load-lane instructions must not require explicit
4927 gcc_assert (!load_lanes_p
4928 || alignment_support_scheme
== dr_aligned
4929 || alignment_support_scheme
== dr_unaligned_supported
);
4931 /* In case the vectorization factor (VF) is bigger than the number
4932 of elements that we can fit in a vectype (nunits), we have to generate
4933 more than one vector stmt - i.e - we need to "unroll" the
4934 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4935 from one copy of the vector stmt to the next, in the field
4936 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4937 stages to find the correct vector defs to be used when vectorizing
4938 stmts that use the defs of the current stmt. The example below
4939 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4940 need to create 4 vectorized stmts):
4942 before vectorization:
4943 RELATED_STMT VEC_STMT
4947 step 1: vectorize stmt S1:
4948 We first create the vector stmt VS1_0, and, as usual, record a
4949 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4950 Next, we create the vector stmt VS1_1, and record a pointer to
4951 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4952 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4954 RELATED_STMT VEC_STMT
4955 VS1_0: vx0 = memref0 VS1_1 -
4956 VS1_1: vx1 = memref1 VS1_2 -
4957 VS1_2: vx2 = memref2 VS1_3 -
4958 VS1_3: vx3 = memref3 - -
4959 S1: x = load - VS1_0
4962 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4963 information we recorded in RELATED_STMT field is used to vectorize
4966 /* In case of interleaving (non-unit grouped access):
4973 Vectorized loads are created in the order of memory accesses
4974 starting from the access of the first stmt of the chain:
4977 VS2: vx1 = &base + vec_size*1
4978 VS3: vx3 = &base + vec_size*2
4979 VS4: vx4 = &base + vec_size*3
4981 Then permutation statements are generated:
4983 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4984 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4987 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4988 (the order of the data-refs in the output of vect_permute_load_chain
4989 corresponds to the order of scalar stmts in the interleaving chain - see
4990 the documentation of vect_permute_load_chain()).
4991 The generation of permutation stmts and recording them in
4992 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4994 In case of both multiple types and interleaving, the vector loads and
4995 permutation stmts above are created for every copy. The result vector
4996 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4997 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4999 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5000 on a target that supports unaligned accesses (dr_unaligned_supported)
5001 we generate the following code:
5005 p = p + indx * vectype_size;
5010 Otherwise, the data reference is potentially unaligned on a target that
5011 does not support unaligned accesses (dr_explicit_realign_optimized) -
5012 then generate the following code, in which the data in each iteration is
5013 obtained by two vector loads, one from the previous iteration, and one
5014 from the current iteration:
5016 msq_init = *(floor(p1))
5017 p2 = initial_addr + VS - 1;
5018 realignment_token = call target_builtin;
5021 p2 = p2 + indx * vectype_size
5023 vec_dest = realign_load (msq, lsq, realignment_token)
5028 /* If the misalignment remains the same throughout the execution of the
5029 loop, we can create the init_addr and permutation mask at the loop
5030 preheader. Otherwise, it needs to be created inside the loop.
5031 This can only occur when vectorizing memory accesses in the inner-loop
5032 nested within an outer-loop that is being vectorized. */
5034 if (nested_in_vect_loop
5035 && (TREE_INT_CST_LOW (DR_STEP (dr
))
5036 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
5038 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
5039 compute_in_loop
= true;
5042 if ((alignment_support_scheme
== dr_explicit_realign_optimized
5043 || alignment_support_scheme
== dr_explicit_realign
)
5044 && !compute_in_loop
)
5046 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
5047 alignment_support_scheme
, NULL_TREE
,
5049 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5051 phi
= SSA_NAME_DEF_STMT (msq
);
5052 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5059 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5062 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5064 aggr_type
= vectype
;
5066 prev_stmt_info
= NULL
;
5067 for (j
= 0; j
< ncopies
; j
++)
5069 /* 1. Create the vector or array pointer update chain. */
5072 bool simd_lane_access_p
5073 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5074 if (simd_lane_access_p
5075 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5076 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5077 && integer_zerop (DR_OFFSET (first_dr
))
5078 && integer_zerop (DR_INIT (first_dr
))
5079 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5080 get_alias_set (DR_REF (first_dr
)))
5081 && (alignment_support_scheme
== dr_aligned
5082 || alignment_support_scheme
== dr_unaligned_supported
))
5084 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5085 dataref_offset
= build_int_cst (reference_alias_ptr_type
5086 (DR_REF (first_dr
)), 0);
5091 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
5092 offset
, &dummy
, gsi
, &ptr_incr
,
5093 simd_lane_access_p
, &inv_p
);
5095 else if (dataref_offset
)
5096 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
5097 TYPE_SIZE_UNIT (aggr_type
));
5099 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5100 TYPE_SIZE_UNIT (aggr_type
));
5102 if (grouped_load
|| slp_perm
)
5103 dr_chain
.create (vec_num
);
5109 vec_array
= create_vector_array (vectype
, vec_num
);
5112 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5113 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5114 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
5115 gimple_call_set_lhs (new_stmt
, vec_array
);
5116 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5118 /* Extract each vector into an SSA_NAME. */
5119 for (i
= 0; i
< vec_num
; i
++)
5121 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
5123 dr_chain
.quick_push (new_temp
);
5126 /* Record the mapping between SSA_NAMEs and statements. */
5127 vect_record_grouped_load_vectors (stmt
, dr_chain
);
5131 for (i
= 0; i
< vec_num
; i
++)
5134 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5137 /* 2. Create the vector-load in the loop. */
5138 switch (alignment_support_scheme
)
5141 case dr_unaligned_supported
:
5143 unsigned int align
, misalign
;
5146 = build2 (MEM_REF
, vectype
, dataref_ptr
,
5149 : build_int_cst (reference_alias_ptr_type
5150 (DR_REF (first_dr
)), 0));
5151 align
= TYPE_ALIGN_UNIT (vectype
);
5152 if (alignment_support_scheme
== dr_aligned
)
5154 gcc_assert (aligned_access_p (first_dr
));
5157 else if (DR_MISALIGNMENT (first_dr
) == -1)
5159 TREE_TYPE (data_ref
)
5160 = build_aligned_type (TREE_TYPE (data_ref
),
5161 TYPE_ALIGN (elem_type
));
5162 align
= TYPE_ALIGN_UNIT (elem_type
);
5167 TREE_TYPE (data_ref
)
5168 = build_aligned_type (TREE_TYPE (data_ref
),
5169 TYPE_ALIGN (elem_type
));
5170 misalign
= DR_MISALIGNMENT (first_dr
);
5172 if (dataref_offset
== NULL_TREE
)
5173 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
5177 case dr_explicit_realign
:
5182 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5184 if (compute_in_loop
)
5185 msq
= vect_setup_realignment (first_stmt
, gsi
,
5187 dr_explicit_realign
,
5190 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
5191 new_stmt
= gimple_build_assign_with_ops
5192 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
5194 (TREE_TYPE (dataref_ptr
),
5195 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5196 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5198 = build2 (MEM_REF
, vectype
, ptr
,
5199 build_int_cst (reference_alias_ptr_type
5200 (DR_REF (first_dr
)), 0));
5201 vec_dest
= vect_create_destination_var (scalar_dest
,
5203 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5204 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5205 gimple_assign_set_lhs (new_stmt
, new_temp
);
5206 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5207 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5208 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5211 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5212 TYPE_SIZE_UNIT (elem_type
));
5213 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5214 new_stmt
= gimple_build_assign_with_ops
5215 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5218 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5219 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
5220 gimple_assign_set_lhs (new_stmt
, ptr
);
5221 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5223 = build2 (MEM_REF
, vectype
, ptr
,
5224 build_int_cst (reference_alias_ptr_type
5225 (DR_REF (first_dr
)), 0));
5228 case dr_explicit_realign_optimized
:
5229 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5230 new_stmt
= gimple_build_assign_with_ops
5231 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5233 (TREE_TYPE (dataref_ptr
),
5234 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5235 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5237 = build2 (MEM_REF
, vectype
, new_temp
,
5238 build_int_cst (reference_alias_ptr_type
5239 (DR_REF (first_dr
)), 0));
5244 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5245 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5246 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5247 gimple_assign_set_lhs (new_stmt
, new_temp
);
5248 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5250 /* 3. Handle explicit realignment if necessary/supported.
5252 vec_dest = realign_load (msq, lsq, realignment_token) */
5253 if (alignment_support_scheme
== dr_explicit_realign_optimized
5254 || alignment_support_scheme
== dr_explicit_realign
)
5256 lsq
= gimple_assign_lhs (new_stmt
);
5257 if (!realignment_token
)
5258 realignment_token
= dataref_ptr
;
5259 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5261 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
5264 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5265 gimple_assign_set_lhs (new_stmt
, new_temp
);
5266 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5268 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5271 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5272 add_phi_arg (phi
, lsq
,
5273 loop_latch_edge (containing_loop
),
5279 /* 4. Handle invariant-load. */
5280 if (inv_p
&& !bb_vinfo
)
5282 gimple_stmt_iterator gsi2
= *gsi
;
5283 gcc_assert (!grouped_load
);
5285 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5287 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5292 tree perm_mask
= perm_mask_for_reverse (vectype
);
5293 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5294 perm_mask
, stmt
, gsi
);
5295 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5298 /* Collect vector loads and later create their permutation in
5299 vect_transform_grouped_load (). */
5300 if (grouped_load
|| slp_perm
)
5301 dr_chain
.quick_push (new_temp
);
5303 /* Store vector loads in the corresponding SLP_NODE. */
5304 if (slp
&& !slp_perm
)
5305 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5307 /* Bump the vector pointer to account for a gap. */
5308 if (slp
&& group_gap
!= 0)
5310 tree bump
= size_binop (MULT_EXPR
,
5311 TYPE_SIZE_UNIT (elem_type
),
5312 size_int (group_gap
));
5313 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5318 if (slp
&& !slp_perm
)
5323 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
5324 slp_node_instance
, false))
5326 dr_chain
.release ();
5335 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5336 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5341 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5343 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5344 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5347 dr_chain
.release ();
5353 /* Function vect_is_simple_cond.
5356 LOOP - the loop that is being vectorized.
5357 COND - Condition that is checked for simple use.
5360 *COMP_VECTYPE - the vector type for the comparison.
5362 Returns whether a COND can be vectorized. Checks whether
5363 condition operands are supportable using vec_is_simple_use. */
5366 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5367 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5371 enum vect_def_type dt
;
5372 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5374 if (!COMPARISON_CLASS_P (cond
))
5377 lhs
= TREE_OPERAND (cond
, 0);
5378 rhs
= TREE_OPERAND (cond
, 1);
5380 if (TREE_CODE (lhs
) == SSA_NAME
)
5382 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5383 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5384 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5387 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5388 && TREE_CODE (lhs
) != FIXED_CST
)
5391 if (TREE_CODE (rhs
) == SSA_NAME
)
5393 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5394 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5395 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5398 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5399 && TREE_CODE (rhs
) != FIXED_CST
)
5402 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5406 /* vectorizable_condition.
5408 Check if STMT is conditional modify expression that can be vectorized.
5409 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5410 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5413 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5414 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5415 else caluse if it is 2).
5417 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5420 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5421 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5424 tree scalar_dest
= NULL_TREE
;
5425 tree vec_dest
= NULL_TREE
;
5426 tree cond_expr
, then_clause
, else_clause
;
5427 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5428 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5429 tree comp_vectype
= NULL_TREE
;
5430 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5431 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5432 tree vec_compare
, vec_cond_expr
;
5434 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5436 enum vect_def_type dt
, dts
[4];
5437 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5439 enum tree_code code
;
5440 stmt_vec_info prev_stmt_info
= NULL
;
5442 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5443 vec
<tree
> vec_oprnds0
= vNULL
;
5444 vec
<tree
> vec_oprnds1
= vNULL
;
5445 vec
<tree
> vec_oprnds2
= vNULL
;
5446 vec
<tree
> vec_oprnds3
= vNULL
;
5449 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5452 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5454 gcc_assert (ncopies
>= 1);
5455 if (reduc_index
&& ncopies
> 1)
5456 return false; /* FORNOW */
5458 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5461 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5464 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5465 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5469 /* FORNOW: not yet supported. */
5470 if (STMT_VINFO_LIVE_P (stmt_info
))
5472 if (dump_enabled_p ())
5473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5474 "value used after loop.\n");
5478 /* Is vectorizable conditional operation? */
5479 if (!is_gimple_assign (stmt
))
5482 code
= gimple_assign_rhs_code (stmt
);
5484 if (code
!= COND_EXPR
)
5487 cond_expr
= gimple_assign_rhs1 (stmt
);
5488 then_clause
= gimple_assign_rhs2 (stmt
);
5489 else_clause
= gimple_assign_rhs3 (stmt
);
5491 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5496 if (TREE_CODE (then_clause
) == SSA_NAME
)
5498 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5499 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5500 &then_def_stmt
, &def
, &dt
))
5503 else if (TREE_CODE (then_clause
) != INTEGER_CST
5504 && TREE_CODE (then_clause
) != REAL_CST
5505 && TREE_CODE (then_clause
) != FIXED_CST
)
5508 if (TREE_CODE (else_clause
) == SSA_NAME
)
5510 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5511 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5512 &else_def_stmt
, &def
, &dt
))
5515 else if (TREE_CODE (else_clause
) != INTEGER_CST
5516 && TREE_CODE (else_clause
) != REAL_CST
5517 && TREE_CODE (else_clause
) != FIXED_CST
)
5520 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
5521 /* The result of a vector comparison should be signed type. */
5522 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
5523 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
5524 if (vec_cmp_type
== NULL_TREE
)
5529 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5530 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5537 vec_oprnds0
.create (1);
5538 vec_oprnds1
.create (1);
5539 vec_oprnds2
.create (1);
5540 vec_oprnds3
.create (1);
5544 scalar_dest
= gimple_assign_lhs (stmt
);
5545 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5547 /* Handle cond expr. */
5548 for (j
= 0; j
< ncopies
; j
++)
5550 gimple new_stmt
= NULL
;
5557 vec
<vec
<tree
> > vec_defs
;
5559 vec_defs
.create (4);
5560 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
5561 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
5562 ops
.safe_push (then_clause
);
5563 ops
.safe_push (else_clause
);
5564 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5565 vec_oprnds3
= vec_defs
.pop ();
5566 vec_oprnds2
= vec_defs
.pop ();
5567 vec_oprnds1
= vec_defs
.pop ();
5568 vec_oprnds0
= vec_defs
.pop ();
5571 vec_defs
.release ();
5577 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5579 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5580 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5583 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5585 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5586 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5587 if (reduc_index
== 1)
5588 vec_then_clause
= reduc_def
;
5591 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5593 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5594 NULL
, >emp
, &def
, &dts
[2]);
5596 if (reduc_index
== 2)
5597 vec_else_clause
= reduc_def
;
5600 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5602 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5603 NULL
, >emp
, &def
, &dts
[3]);
5609 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5610 vec_oprnds0
.pop ());
5611 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5612 vec_oprnds1
.pop ());
5613 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5614 vec_oprnds2
.pop ());
5615 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5616 vec_oprnds3
.pop ());
5621 vec_oprnds0
.quick_push (vec_cond_lhs
);
5622 vec_oprnds1
.quick_push (vec_cond_rhs
);
5623 vec_oprnds2
.quick_push (vec_then_clause
);
5624 vec_oprnds3
.quick_push (vec_else_clause
);
5627 /* Arguments are ready. Create the new vector stmt. */
5628 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
5630 vec_cond_rhs
= vec_oprnds1
[i
];
5631 vec_then_clause
= vec_oprnds2
[i
];
5632 vec_else_clause
= vec_oprnds3
[i
];
5634 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
5635 vec_cond_lhs
, vec_cond_rhs
);
5636 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5637 vec_compare
, vec_then_clause
, vec_else_clause
);
5639 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5640 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5641 gimple_assign_set_lhs (new_stmt
, new_temp
);
5642 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5644 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5651 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5653 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5655 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5658 vec_oprnds0
.release ();
5659 vec_oprnds1
.release ();
5660 vec_oprnds2
.release ();
5661 vec_oprnds3
.release ();
5667 /* Make sure the statement is vectorizable. */
5670 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5672 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5673 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5674 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5676 tree scalar_type
, vectype
;
5677 gimple pattern_stmt
;
5678 gimple_seq pattern_def_seq
;
5680 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
5683 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5684 dump_printf (MSG_NOTE
, "\n");
5687 if (gimple_has_volatile_ops (stmt
))
5689 if (dump_enabled_p ())
5690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5691 "not vectorized: stmt has volatile operands\n");
5696 /* Skip stmts that do not need to be vectorized. In loops this is expected
5698 - the COND_EXPR which is the loop exit condition
5699 - any LABEL_EXPRs in the loop
5700 - computations that are used only for array indexing or loop control.
5701 In basic blocks we only analyze statements that are a part of some SLP
5702 instance, therefore, all the statements are relevant.
5704 Pattern statement needs to be analyzed instead of the original statement
5705 if the original statement is not relevant. Otherwise, we analyze both
5706 statements. In basic blocks we are called from some SLP instance
5707 traversal, don't analyze pattern stmts instead, the pattern stmts
5708 already will be part of SLP instance. */
5710 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5711 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5712 && !STMT_VINFO_LIVE_P (stmt_info
))
5714 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5716 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5717 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5719 /* Analyze PATTERN_STMT instead of the original stmt. */
5720 stmt
= pattern_stmt
;
5721 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5722 if (dump_enabled_p ())
5724 dump_printf_loc (MSG_NOTE
, vect_location
,
5725 "==> examining pattern statement: ");
5726 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5727 dump_printf (MSG_NOTE
, "\n");
5732 if (dump_enabled_p ())
5733 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
5738 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5741 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5742 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5744 /* Analyze PATTERN_STMT too. */
5745 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_NOTE
, vect_location
,
5748 "==> examining pattern statement: ");
5749 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5750 dump_printf (MSG_NOTE
, "\n");
5753 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5757 if (is_pattern_stmt_p (stmt_info
)
5759 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5761 gimple_stmt_iterator si
;
5763 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5765 gimple pattern_def_stmt
= gsi_stmt (si
);
5766 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5767 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5769 /* Analyze def stmt of STMT if it's a pattern stmt. */
5770 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_NOTE
, vect_location
,
5773 "==> examining pattern def statement: ");
5774 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
5775 dump_printf (MSG_NOTE
, "\n");
5778 if (!vect_analyze_stmt (pattern_def_stmt
,
5779 need_to_vectorize
, node
))
5785 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5787 case vect_internal_def
:
5790 case vect_reduction_def
:
5791 case vect_nested_cycle
:
5792 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5793 || relevance
== vect_used_in_outer_by_reduction
5794 || relevance
== vect_unused_in_scope
));
5797 case vect_induction_def
:
5798 case vect_constant_def
:
5799 case vect_external_def
:
5800 case vect_unknown_def_type
:
5807 gcc_assert (PURE_SLP_STMT (stmt_info
));
5809 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5810 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE
, vect_location
,
5813 "get vectype for scalar type: ");
5814 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
5815 dump_printf (MSG_NOTE
, "\n");
5818 vectype
= get_vectype_for_scalar_type (scalar_type
);
5821 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5824 "not SLPed: unsupported data-type ");
5825 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5827 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5832 if (dump_enabled_p ())
5834 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
5835 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
5836 dump_printf (MSG_NOTE
, "\n");
5839 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5842 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5844 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5845 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5846 *need_to_vectorize
= true;
5851 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5852 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5853 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5854 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5855 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5856 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5857 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5858 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5859 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5860 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5861 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5865 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5866 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5867 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5868 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5869 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5870 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5871 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5872 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5877 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5880 "not vectorized: relevant stmt not ");
5881 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5882 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5883 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5892 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5893 need extra handling, except for vectorizable reductions. */
5894 if (STMT_VINFO_LIVE_P (stmt_info
)
5895 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5896 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5900 if (dump_enabled_p ())
5902 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5903 "not vectorized: live stmt not ");
5904 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5905 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5906 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5916 /* Function vect_transform_stmt.
5918 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5921 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5922 bool *grouped_store
, slp_tree slp_node
,
5923 slp_instance slp_node_instance
)
5925 bool is_store
= false;
5926 gimple vec_stmt
= NULL
;
5927 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5930 switch (STMT_VINFO_TYPE (stmt_info
))
5932 case type_demotion_vec_info_type
:
5933 case type_promotion_vec_info_type
:
5934 case type_conversion_vec_info_type
:
5935 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5939 case induc_vec_info_type
:
5940 gcc_assert (!slp_node
);
5941 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5945 case shift_vec_info_type
:
5946 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5950 case op_vec_info_type
:
5951 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5955 case assignment_vec_info_type
:
5956 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5960 case load_vec_info_type
:
5961 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5966 case store_vec_info_type
:
5967 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5969 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5971 /* In case of interleaving, the whole chain is vectorized when the
5972 last store in the chain is reached. Store stmts before the last
5973 one are skipped, and there vec_stmt_info shouldn't be freed
5975 *grouped_store
= true;
5976 if (STMT_VINFO_VEC_STMT (stmt_info
))
5983 case condition_vec_info_type
:
5984 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5988 case call_vec_info_type
:
5989 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5990 stmt
= gsi_stmt (*gsi
);
5993 case reduc_vec_info_type
:
5994 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5999 if (!STMT_VINFO_LIVE_P (stmt_info
))
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6003 "stmt not supported.\n");
6008 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
6009 is being vectorized, but outside the immediately enclosing loop. */
6011 && STMT_VINFO_LOOP_VINFO (stmt_info
)
6012 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
6013 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
6014 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
6015 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
6016 || STMT_VINFO_RELEVANT (stmt_info
) ==
6017 vect_used_in_outer_by_reduction
))
6019 struct loop
*innerloop
= LOOP_VINFO_LOOP (
6020 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
6021 imm_use_iterator imm_iter
;
6022 use_operand_p use_p
;
6026 if (dump_enabled_p ())
6027 dump_printf_loc (MSG_NOTE
, vect_location
,
6028 "Record the vdef for outer-loop vectorization.\n");
6030 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6031 (to be used when vectorizing outer-loop stmts that use the DEF of
6033 if (gimple_code (stmt
) == GIMPLE_PHI
)
6034 scalar_dest
= PHI_RESULT (stmt
);
6036 scalar_dest
= gimple_assign_lhs (stmt
);
6038 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
6040 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
6042 exit_phi
= USE_STMT (use_p
);
6043 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
6048 /* Handle stmts whose DEF is used outside the loop-nest that is
6049 being vectorized. */
6050 if (STMT_VINFO_LIVE_P (stmt_info
)
6051 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
6053 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
6058 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
6064 /* Remove a group of stores (for SLP or interleaving), free their
6068 vect_remove_stores (gimple first_stmt
)
6070 gimple next
= first_stmt
;
6072 gimple_stmt_iterator next_si
;
6076 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
6078 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
6079 if (is_pattern_stmt_p (stmt_info
))
6080 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
6081 /* Free the attached stmt_vec_info and remove the stmt. */
6082 next_si
= gsi_for_stmt (next
);
6083 unlink_stmt_vdef (next
);
6084 gsi_remove (&next_si
, true);
6085 release_defs (next
);
6086 free_stmt_vec_info (next
);
6092 /* Function new_stmt_vec_info.
6094 Create and initialize a new stmt_vec_info struct for STMT. */
6097 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
6098 bb_vec_info bb_vinfo
)
6101 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
6103 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
6104 STMT_VINFO_STMT (res
) = stmt
;
6105 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
6106 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
6107 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
6108 STMT_VINFO_LIVE_P (res
) = false;
6109 STMT_VINFO_VECTYPE (res
) = NULL
;
6110 STMT_VINFO_VEC_STMT (res
) = NULL
;
6111 STMT_VINFO_VECTORIZABLE (res
) = true;
6112 STMT_VINFO_IN_PATTERN_P (res
) = false;
6113 STMT_VINFO_RELATED_STMT (res
) = NULL
;
6114 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
6115 STMT_VINFO_DATA_REF (res
) = NULL
;
6117 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
6118 STMT_VINFO_DR_OFFSET (res
) = NULL
;
6119 STMT_VINFO_DR_INIT (res
) = NULL
;
6120 STMT_VINFO_DR_STEP (res
) = NULL
;
6121 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
6123 if (gimple_code (stmt
) == GIMPLE_PHI
6124 && is_loop_header_bb_p (gimple_bb (stmt
)))
6125 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
6127 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
6129 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
6130 STMT_SLP_TYPE (res
) = loop_vect
;
6131 GROUP_FIRST_ELEMENT (res
) = NULL
;
6132 GROUP_NEXT_ELEMENT (res
) = NULL
;
6133 GROUP_SIZE (res
) = 0;
6134 GROUP_STORE_COUNT (res
) = 0;
6135 GROUP_GAP (res
) = 0;
6136 GROUP_SAME_DR_STMT (res
) = NULL
;
6142 /* Create a hash table for stmt_vec_info. */
6145 init_stmt_vec_info_vec (void)
6147 gcc_assert (!stmt_vec_info_vec
.exists ());
6148 stmt_vec_info_vec
.create (50);
6152 /* Free hash table for stmt_vec_info. */
6155 free_stmt_vec_info_vec (void)
6159 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
6161 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
6162 gcc_assert (stmt_vec_info_vec
.exists ());
6163 stmt_vec_info_vec
.release ();
6167 /* Free stmt vectorization related info. */
6170 free_stmt_vec_info (gimple stmt
)
6172 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6177 /* Check if this statement has a related "pattern stmt"
6178 (introduced by the vectorizer during the pattern recognition
6179 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6181 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
6183 stmt_vec_info patt_info
6184 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6187 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
6190 gimple_stmt_iterator si
;
6191 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
6192 free_stmt_vec_info (gsi_stmt (si
));
6194 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
6198 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
6199 set_vinfo_for_stmt (stmt
, NULL
);
6204 /* Function get_vectype_for_scalar_type_and_size.
6206 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6210 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
6212 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
6213 enum machine_mode simd_mode
;
6214 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
6221 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
6222 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
6225 /* For vector types of elements whose mode precision doesn't
6226 match their types precision we use a element type of mode
6227 precision. The vectorization routines will have to make sure
6228 they support the proper result truncation/extension.
6229 We also make sure to build vector types with INTEGER_TYPE
6230 component type only. */
6231 if (INTEGRAL_TYPE_P (scalar_type
)
6232 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
6233 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
6234 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
6235 TYPE_UNSIGNED (scalar_type
));
6237 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6238 When the component mode passes the above test simply use a type
6239 corresponding to that mode. The theory is that any use that
6240 would cause problems with this will disable vectorization anyway. */
6241 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
6242 && !INTEGRAL_TYPE_P (scalar_type
))
6243 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
6245 /* We can't build a vector type of elements with alignment bigger than
6247 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
6248 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
6249 TYPE_UNSIGNED (scalar_type
));
6251 /* If we felt back to using the mode fail if there was
6252 no scalar type for it. */
6253 if (scalar_type
== NULL_TREE
)
6256 /* If no size was supplied use the mode the target prefers. Otherwise
6257 lookup a vector mode of the specified size. */
6259 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6261 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6262 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6266 vectype
= build_vector_type (scalar_type
, nunits
);
6268 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6269 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6275 unsigned int current_vector_size
;
6277 /* Function get_vectype_for_scalar_type.
6279 Returns the vector type corresponding to SCALAR_TYPE as supported
6283 get_vectype_for_scalar_type (tree scalar_type
)
6286 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6287 current_vector_size
);
6289 && current_vector_size
== 0)
6290 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6294 /* Function get_same_sized_vectype
6296 Returns a vector type corresponding to SCALAR_TYPE of size
6297 VECTOR_TYPE if supported by the target. */
6300 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6302 return get_vectype_for_scalar_type_and_size
6303 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6306 /* Function vect_is_simple_use.
6309 LOOP_VINFO - the vect info of the loop that is being vectorized.
6310 BB_VINFO - the vect info of the basic block that is being vectorized.
6311 OPERAND - operand of STMT in the loop or bb.
6312 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6314 Returns whether a stmt with OPERAND can be vectorized.
6315 For loops, supportable operands are constants, loop invariants, and operands
6316 that are defined by the current iteration of the loop. Unsupportable
6317 operands are those that are defined by a previous iteration of the loop (as
6318 is the case in reduction/induction computations).
6319 For basic blocks, supportable operands are constants and bb invariants.
6320 For now, operands defined outside the basic block are not supported. */
6323 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6324 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6325 tree
*def
, enum vect_def_type
*dt
)
6328 stmt_vec_info stmt_vinfo
;
6329 struct loop
*loop
= NULL
;
6332 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6337 if (dump_enabled_p ())
6339 dump_printf_loc (MSG_NOTE
, vect_location
,
6340 "vect_is_simple_use: operand ");
6341 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
6342 dump_printf (MSG_NOTE
, "\n");
6345 if (CONSTANT_CLASS_P (operand
))
6347 *dt
= vect_constant_def
;
6351 if (is_gimple_min_invariant (operand
))
6354 *dt
= vect_external_def
;
6358 if (TREE_CODE (operand
) == PAREN_EXPR
)
6360 if (dump_enabled_p ())
6361 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
6362 operand
= TREE_OPERAND (operand
, 0);
6365 if (TREE_CODE (operand
) != SSA_NAME
)
6367 if (dump_enabled_p ())
6368 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6373 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6374 if (*def_stmt
== NULL
)
6376 if (dump_enabled_p ())
6377 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6382 if (dump_enabled_p ())
6384 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
6385 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
6386 dump_printf (MSG_NOTE
, "\n");
6389 /* Empty stmt is expected only in case of a function argument.
6390 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6391 if (gimple_nop_p (*def_stmt
))
6394 *dt
= vect_external_def
;
6398 bb
= gimple_bb (*def_stmt
);
6400 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6401 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6402 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6403 *dt
= vect_external_def
;
6406 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6407 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6410 if (*dt
== vect_unknown_def_type
6412 && *dt
== vect_double_reduction_def
6413 && gimple_code (stmt
) != GIMPLE_PHI
))
6415 if (dump_enabled_p ())
6416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6417 "Unsupported pattern.\n");
6421 if (dump_enabled_p ())
6422 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
6424 switch (gimple_code (*def_stmt
))
6427 *def
= gimple_phi_result (*def_stmt
);
6431 *def
= gimple_assign_lhs (*def_stmt
);
6435 *def
= gimple_call_lhs (*def_stmt
);
6440 if (dump_enabled_p ())
6441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6442 "unsupported defining stmt:\n");
6449 /* Function vect_is_simple_use_1.
6451 Same as vect_is_simple_use_1 but also determines the vector operand
6452 type of OPERAND and stores it to *VECTYPE. If the definition of
6453 OPERAND is vect_uninitialized_def, vect_constant_def or
6454 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6455 is responsible to compute the best suited vector type for the
6459 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6460 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6461 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6463 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6467 /* Now get a vector type if the def is internal, otherwise supply
6468 NULL_TREE and leave it up to the caller to figure out a proper
6469 type for the use stmt. */
6470 if (*dt
== vect_internal_def
6471 || *dt
== vect_induction_def
6472 || *dt
== vect_reduction_def
6473 || *dt
== vect_double_reduction_def
6474 || *dt
== vect_nested_cycle
)
6476 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6478 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6479 && !STMT_VINFO_RELEVANT (stmt_info
)
6480 && !STMT_VINFO_LIVE_P (stmt_info
))
6481 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6483 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6484 gcc_assert (*vectype
!= NULL_TREE
);
6486 else if (*dt
== vect_uninitialized_def
6487 || *dt
== vect_constant_def
6488 || *dt
== vect_external_def
)
6489 *vectype
= NULL_TREE
;
6497 /* Function supportable_widening_operation
6499 Check whether an operation represented by the code CODE is a
6500 widening operation that is supported by the target platform in
6501 vector form (i.e., when operating on arguments of type VECTYPE_IN
6502 producing a result of type VECTYPE_OUT).
6504 Widening operations we currently support are NOP (CONVERT), FLOAT
6505 and WIDEN_MULT. This function checks if these operations are supported
6506 by the target platform either directly (via vector tree-codes), or via
6510 - CODE1 and CODE2 are codes of vector operations to be used when
6511 vectorizing the operation, if available.
6512 - MULTI_STEP_CVT determines the number of required intermediate steps in
6513 case of multi-step conversion (like char->short->int - in that case
6514 MULTI_STEP_CVT will be 1).
6515 - INTERM_TYPES contains the intermediate type required to perform the
6516 widening operation (short in the above example). */
6519 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6520 tree vectype_out
, tree vectype_in
,
6521 enum tree_code
*code1
, enum tree_code
*code2
,
6522 int *multi_step_cvt
,
6523 vec
<tree
> *interm_types
)
6525 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6526 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6527 struct loop
*vect_loop
= NULL
;
6528 enum machine_mode vec_mode
;
6529 enum insn_code icode1
, icode2
;
6530 optab optab1
, optab2
;
6531 tree vectype
= vectype_in
;
6532 tree wide_vectype
= vectype_out
;
6533 enum tree_code c1
, c2
;
6535 tree prev_type
, intermediate_type
;
6536 enum machine_mode intermediate_mode
, prev_mode
;
6537 optab optab3
, optab4
;
6539 *multi_step_cvt
= 0;
6541 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6545 case WIDEN_MULT_EXPR
:
6546 /* The result of a vectorized widening operation usually requires
6547 two vectors (because the widened results do not fit into one vector).
6548 The generated vector results would normally be expected to be
6549 generated in the same order as in the original scalar computation,
6550 i.e. if 8 results are generated in each vector iteration, they are
6551 to be organized as follows:
6552 vect1: [res1,res2,res3,res4],
6553 vect2: [res5,res6,res7,res8].
6555 However, in the special case that the result of the widening
6556 operation is used in a reduction computation only, the order doesn't
6557 matter (because when vectorizing a reduction we change the order of
6558 the computation). Some targets can take advantage of this and
6559 generate more efficient code. For example, targets like Altivec,
6560 that support widen_mult using a sequence of {mult_even,mult_odd}
6561 generate the following vectors:
6562 vect1: [res1,res3,res5,res7],
6563 vect2: [res2,res4,res6,res8].
6565 When vectorizing outer-loops, we execute the inner-loop sequentially
6566 (each vectorized inner-loop iteration contributes to VF outer-loop
6567 iterations in parallel). We therefore don't allow to change the
6568 order of the computation in the inner-loop during outer-loop
6570 /* TODO: Another case in which order doesn't *really* matter is when we
6571 widen and then contract again, e.g. (short)((int)x * y >> 8).
6572 Normally, pack_trunc performs an even/odd permute, whereas the
6573 repack from an even/odd expansion would be an interleave, which
6574 would be significantly simpler for e.g. AVX2. */
6575 /* In any case, in order to avoid duplicating the code below, recurse
6576 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6577 are properly set up for the caller. If we fail, we'll continue with
6578 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6580 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6581 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6582 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6583 stmt
, vectype_out
, vectype_in
,
6584 code1
, code2
, multi_step_cvt
,
6587 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6588 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6591 case VEC_WIDEN_MULT_EVEN_EXPR
:
6592 /* Support the recursion induced just above. */
6593 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6594 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6597 case WIDEN_LSHIFT_EXPR
:
6598 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6599 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6603 c1
= VEC_UNPACK_LO_EXPR
;
6604 c2
= VEC_UNPACK_HI_EXPR
;
6608 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6609 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6612 case FIX_TRUNC_EXPR
:
6613 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6614 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6615 computing the operation. */
6622 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6624 enum tree_code ctmp
= c1
;
6629 if (code
== FIX_TRUNC_EXPR
)
6631 /* The signedness is determined from output operand. */
6632 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6633 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6637 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6638 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6641 if (!optab1
|| !optab2
)
6644 vec_mode
= TYPE_MODE (vectype
);
6645 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6646 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6652 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6653 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6656 /* Check if it's a multi-step conversion that can be done using intermediate
6659 prev_type
= vectype
;
6660 prev_mode
= vec_mode
;
6662 if (!CONVERT_EXPR_CODE_P (code
))
6665 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6666 intermediate steps in promotion sequence. We try
6667 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6669 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6670 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6672 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6674 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6675 TYPE_UNSIGNED (prev_type
));
6676 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6677 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6679 if (!optab3
|| !optab4
6680 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6681 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6682 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6683 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6684 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6685 == CODE_FOR_nothing
)
6686 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6687 == CODE_FOR_nothing
))
6690 interm_types
->quick_push (intermediate_type
);
6691 (*multi_step_cvt
)++;
6693 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6694 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6697 prev_type
= intermediate_type
;
6698 prev_mode
= intermediate_mode
;
6701 interm_types
->release ();
6706 /* Function supportable_narrowing_operation
6708 Check whether an operation represented by the code CODE is a
6709 narrowing operation that is supported by the target platform in
6710 vector form (i.e., when operating on arguments of type VECTYPE_IN
6711 and producing a result of type VECTYPE_OUT).
6713 Narrowing operations we currently support are NOP (CONVERT) and
6714 FIX_TRUNC. This function checks if these operations are supported by
6715 the target platform directly via vector tree-codes.
6718 - CODE1 is the code of a vector operation to be used when
6719 vectorizing the operation, if available.
6720 - MULTI_STEP_CVT determines the number of required intermediate steps in
6721 case of multi-step conversion (like int->short->char - in that case
6722 MULTI_STEP_CVT will be 1).
6723 - INTERM_TYPES contains the intermediate type required to perform the
6724 narrowing operation (short in the above example). */
6727 supportable_narrowing_operation (enum tree_code code
,
6728 tree vectype_out
, tree vectype_in
,
6729 enum tree_code
*code1
, int *multi_step_cvt
,
6730 vec
<tree
> *interm_types
)
6732 enum machine_mode vec_mode
;
6733 enum insn_code icode1
;
6734 optab optab1
, interm_optab
;
6735 tree vectype
= vectype_in
;
6736 tree narrow_vectype
= vectype_out
;
6738 tree intermediate_type
;
6739 enum machine_mode intermediate_mode
, prev_mode
;
6743 *multi_step_cvt
= 0;
6747 c1
= VEC_PACK_TRUNC_EXPR
;
6750 case FIX_TRUNC_EXPR
:
6751 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6755 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6756 tree code and optabs used for computing the operation. */
6763 if (code
== FIX_TRUNC_EXPR
)
6764 /* The signedness is determined from output operand. */
6765 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6767 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6772 vec_mode
= TYPE_MODE (vectype
);
6773 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6778 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6781 /* Check if it's a multi-step conversion that can be done using intermediate
6783 prev_mode
= vec_mode
;
6784 if (code
== FIX_TRUNC_EXPR
)
6785 uns
= TYPE_UNSIGNED (vectype_out
);
6787 uns
= TYPE_UNSIGNED (vectype
);
6789 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6790 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6791 costly than signed. */
6792 if (code
== FIX_TRUNC_EXPR
&& uns
)
6794 enum insn_code icode2
;
6797 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6799 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6800 if (interm_optab
!= unknown_optab
6801 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6802 && insn_data
[icode1
].operand
[0].mode
6803 == insn_data
[icode2
].operand
[0].mode
)
6806 optab1
= interm_optab
;
6811 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6812 intermediate steps in promotion sequence. We try
6813 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6814 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6815 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6817 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6819 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6821 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6824 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6825 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6826 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6827 == CODE_FOR_nothing
))
6830 interm_types
->quick_push (intermediate_type
);
6831 (*multi_step_cvt
)++;
6833 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6836 prev_mode
= intermediate_mode
;
6837 optab1
= interm_optab
;
6840 interm_types
->release ();