1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "gimple-ssa.h"
38 #include "tree-phinodes.h"
39 #include "ssa-iterators.h"
40 #include "tree-ssanames.h"
41 #include "tree-ssa-loop-manip.h"
44 #include "recog.h" /* FIXME: for insn_data */
46 #include "diagnostic-core.h"
47 #include "tree-vectorizer.h"
50 /* For lang_hooks.types.type_for_mode. */
51 #include "langhooks.h"
53 /* Return the vectorized type for the given statement. */
56 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
58 return STMT_VINFO_VECTYPE (stmt_info
);
61 /* Return TRUE iff the given statement is in an inner loop relative to
62 the loop being vectorized. */
64 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
66 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
67 basic_block bb
= gimple_bb (stmt
);
68 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
74 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
76 return (bb
->loop_father
== loop
->inner
);
79 /* Record the cost of a statement, either by directly informing the
80 target model or by saving it in a vector for later processing.
81 Return a preliminary estimate of the statement's cost. */
84 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
85 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
86 int misalign
, enum vect_cost_model_location where
)
90 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
91 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
92 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
95 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
100 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
101 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
102 void *target_cost_data
;
105 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
107 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
109 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
114 /* Return a variable of type ELEM_TYPE[NELEMS]. */
117 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
119 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
123 /* ARRAY is an array of vectors created by create_vector_array.
124 Return an SSA_NAME for the vector in index N. The reference
125 is part of the vectorization of STMT and the vector is associated
126 with scalar destination SCALAR_DEST. */
129 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
130 tree array
, unsigned HOST_WIDE_INT n
)
132 tree vect_type
, vect
, vect_name
, array_ref
;
135 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
136 vect_type
= TREE_TYPE (TREE_TYPE (array
));
137 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
138 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
139 build_int_cst (size_type_node
, n
),
140 NULL_TREE
, NULL_TREE
);
142 new_stmt
= gimple_build_assign (vect
, array_ref
);
143 vect_name
= make_ssa_name (vect
, new_stmt
);
144 gimple_assign_set_lhs (new_stmt
, vect_name
);
145 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
150 /* ARRAY is an array of vectors created by create_vector_array.
151 Emit code to store SSA_NAME VECT in index N of the array.
152 The store is part of the vectorization of STMT. */
155 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
156 tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
176 tree mem_ref
, alias_ptr_type
;
178 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
179 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
187 /* Function vect_mark_relevant.
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
192 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
193 enum vect_relevant relevant
, bool live_p
,
194 bool used_in_pattern
)
196 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
197 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
198 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
201 if (dump_enabled_p ())
202 dump_printf_loc (MSG_NOTE
, vect_location
,
203 "mark relevant %d, live %d.\n", relevant
, live_p
);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
212 if (!used_in_pattern
)
214 imm_use_iterator imm_iter
;
218 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
219 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
221 if (is_gimple_assign (stmt
))
222 lhs
= gimple_assign_lhs (stmt
);
224 lhs
= gimple_call_lhs (stmt
);
226 /* This use is out of pattern use, if LHS has other uses that are
227 pattern uses, we should mark the stmt itself, and not the pattern
229 if (TREE_CODE (lhs
) == SSA_NAME
)
230 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
232 if (is_gimple_debug (USE_STMT (use_p
)))
234 use_stmt
= USE_STMT (use_p
);
236 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
239 if (vinfo_for_stmt (use_stmt
)
240 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
250 /* This is the last stmt in a sequence that was detected as a
251 pattern that can potentially be vectorized. Don't mark the stmt
252 as relevant/live because it's not going to be vectorized.
253 Instead mark the pattern-stmt that replaces it. */
255 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
257 if (dump_enabled_p ())
258 dump_printf_loc (MSG_NOTE
, vect_location
,
259 "last stmt in pattern. don't mark"
260 " relevant/live.\n");
261 stmt_info
= vinfo_for_stmt (pattern_stmt
);
262 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
263 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
264 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
269 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
270 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
271 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
273 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE
, vect_location
,
278 "already marked relevant/live.\n");
282 worklist
->safe_push (stmt
);
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt
)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
314 != loop_exit_ctrl_vec_info_type
)
315 *relevant
= vect_used_in_scope
;
317 /* changing memory. */
318 if (gimple_code (stmt
) != GIMPLE_PHI
)
319 if (gimple_vdef (stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (dump_enabled_p ())
336 dump_printf_loc (MSG_NOTE
, vect_location
,
337 "vec_stmt_relevant_p: used out of loop.\n");
339 if (is_gimple_debug (USE_STMT (use_p
)))
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 return (*live_p
|| *relevant
);
356 /* Function exist_non_indexing_operands_for_use_p
358 USE is one of the uses attached to STMT. Check if USE is
359 used in STMT for anything other than indexing an array. */
362 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
365 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
367 /* USE corresponds to some operand in STMT. If there is no data
368 reference in STMT, then any operand that corresponds to USE
369 is not indexing an array. */
370 if (!STMT_VINFO_DATA_REF (stmt_info
))
373 /* STMT has a data_ref. FORNOW this means that its of one of
377 (This should have been verified in analyze_data_refs).
379 'var' in the second case corresponds to a def, not a use,
380 so USE cannot correspond to any operands that are not used
383 Therefore, all we need to check is if STMT falls into the
384 first case, and whether var corresponds to USE. */
386 if (!gimple_assign_copy_p (stmt
))
388 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
390 operand
= gimple_assign_rhs1 (stmt
);
391 if (TREE_CODE (operand
) != SSA_NAME
)
402 Function process_use.
405 - a USE in STMT in a loop represented by LOOP_VINFO
406 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
407 that defined USE. This is done by calling mark_relevant and passing it
408 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
409 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
413 Generally, LIVE_P and RELEVANT are used to define the liveness and
414 relevance info of the DEF_STMT of this USE:
415 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
416 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
418 - case 1: If USE is used only for address computations (e.g. array indexing),
419 which does not need to be directly vectorized, then the liveness/relevance
420 of the respective DEF_STMT is left unchanged.
421 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
422 skip DEF_STMT cause it had already been processed.
423 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
424 be modified accordingly.
426 Return true if everything is as expected. Return false otherwise. */
429 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
430 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
433 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
434 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
435 stmt_vec_info dstmt_vinfo
;
436 basic_block bb
, def_bb
;
439 enum vect_def_type dt
;
441 /* case 1: we are only interested in uses that need to be vectorized. Uses
442 that are used for address computation are not considered relevant. */
443 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
446 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
448 if (dump_enabled_p ())
449 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
450 "not vectorized: unsupported use in stmt.\n");
454 if (!def_stmt
|| gimple_nop_p (def_stmt
))
457 def_bb
= gimple_bb (def_stmt
);
458 if (!flow_bb_inside_loop_p (loop
, def_bb
))
460 if (dump_enabled_p ())
461 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
465 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
466 DEF_STMT must have already been processed, because this should be the
467 only way that STMT, which is a reduction-phi, was put in the worklist,
468 as there should be no other uses for DEF_STMT in the loop. So we just
469 check that everything is as expected, and we are done. */
470 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
471 bb
= gimple_bb (stmt
);
472 if (gimple_code (stmt
) == GIMPLE_PHI
473 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
474 && gimple_code (def_stmt
) != GIMPLE_PHI
475 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
476 && bb
->loop_father
== def_bb
->loop_father
)
478 if (dump_enabled_p ())
479 dump_printf_loc (MSG_NOTE
, vect_location
,
480 "reduc-stmt defining reduc-phi in the same nest.\n");
481 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
482 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
483 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
484 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
485 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
489 /* case 3a: outer-loop stmt defining an inner-loop stmt:
490 outer-loop-header-bb:
496 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
498 if (dump_enabled_p ())
499 dump_printf_loc (MSG_NOTE
, vect_location
,
500 "outer-loop def-stmt defining inner-loop stmt.\n");
504 case vect_unused_in_scope
:
505 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
506 vect_used_in_scope
: vect_unused_in_scope
;
509 case vect_used_in_outer_by_reduction
:
510 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
511 relevant
= vect_used_by_reduction
;
514 case vect_used_in_outer
:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
516 relevant
= vect_used_in_scope
;
519 case vect_used_in_scope
:
527 /* case 3b: inner-loop stmt defining an outer-loop stmt:
528 outer-loop-header-bb:
532 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
534 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
536 if (dump_enabled_p ())
537 dump_printf_loc (MSG_NOTE
, vect_location
,
538 "inner-loop def-stmt defining outer-loop stmt.\n");
542 case vect_unused_in_scope
:
543 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
544 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
545 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
548 case vect_used_by_reduction
:
549 relevant
= vect_used_in_outer_by_reduction
;
552 case vect_used_in_scope
:
553 relevant
= vect_used_in_outer
;
561 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
562 is_pattern_stmt_p (stmt_vinfo
));
567 /* Function vect_mark_stmts_to_be_vectorized.
569 Not all stmts in the loop need to be vectorized. For example:
578 Stmt 1 and 3 do not need to be vectorized, because loop control and
579 addressing of vectorized data-refs are handled differently.
581 This pass detects such stmts. */
584 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
586 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
587 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
588 unsigned int nbbs
= loop
->num_nodes
;
589 gimple_stmt_iterator si
;
592 stmt_vec_info stmt_vinfo
;
596 enum vect_relevant relevant
, tmp_relevant
;
597 enum vect_def_type def_type
;
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE
, vect_location
,
601 "=== vect_mark_stmts_to_be_vectorized ===\n");
603 stack_vec
<gimple
, 64> worklist
;
605 /* 1. Init worklist. */
606 for (i
= 0; i
< nbbs
; i
++)
609 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
612 if (dump_enabled_p ())
614 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
615 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
616 dump_printf (MSG_NOTE
, "\n");
619 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
620 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
622 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
624 stmt
= gsi_stmt (si
);
625 if (dump_enabled_p ())
627 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
628 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
629 dump_printf (MSG_NOTE
, "\n");
632 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
633 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
637 /* 2. Process_worklist */
638 while (worklist
.length () > 0)
643 stmt
= worklist
.pop ();
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
647 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
648 dump_printf (MSG_NOTE
, "\n");
651 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
652 (DEF_STMT) as relevant/irrelevant and live/dead according to the
653 liveness and relevance properties of STMT. */
654 stmt_vinfo
= vinfo_for_stmt (stmt
);
655 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
656 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
658 /* Generally, the liveness and relevance properties of STMT are
659 propagated as is to the DEF_STMTs of its USEs:
660 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
661 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
663 One exception is when STMT has been identified as defining a reduction
664 variable; in this case we set the liveness/relevance as follows:
666 relevant = vect_used_by_reduction
667 This is because we distinguish between two kinds of relevant stmts -
668 those that are used by a reduction computation, and those that are
669 (also) used by a regular computation. This allows us later on to
670 identify stmts that are used solely by a reduction, and therefore the
671 order of the results that they produce does not have to be kept. */
673 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
674 tmp_relevant
= relevant
;
677 case vect_reduction_def
:
678 switch (tmp_relevant
)
680 case vect_unused_in_scope
:
681 relevant
= vect_used_by_reduction
;
684 case vect_used_by_reduction
:
685 if (gimple_code (stmt
) == GIMPLE_PHI
)
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
692 "unsupported use of reduction.\n");
699 case vect_nested_cycle
:
700 if (tmp_relevant
!= vect_unused_in_scope
701 && tmp_relevant
!= vect_used_in_outer_by_reduction
702 && tmp_relevant
!= vect_used_in_outer
)
704 if (dump_enabled_p ())
705 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
706 "unsupported use of nested cycle.\n");
714 case vect_double_reduction_def
:
715 if (tmp_relevant
!= vect_unused_in_scope
716 && tmp_relevant
!= vect_used_by_reduction
)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
720 "unsupported use of double reduction.\n");
732 if (is_pattern_stmt_p (stmt_vinfo
))
734 /* Pattern statements are not inserted into the code, so
735 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
736 have to scan the RHS or function arguments instead. */
737 if (is_gimple_assign (stmt
))
739 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
740 tree op
= gimple_assign_rhs1 (stmt
);
743 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
745 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
746 live_p
, relevant
, &worklist
, false)
747 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
748 live_p
, relevant
, &worklist
, false))
752 for (; i
< gimple_num_ops (stmt
); i
++)
754 op
= gimple_op (stmt
, i
);
755 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
760 else if (is_gimple_call (stmt
))
762 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
764 tree arg
= gimple_call_arg (stmt
, i
);
765 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
772 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
774 tree op
= USE_FROM_PTR (use_p
);
775 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
780 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
783 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
785 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
789 } /* while worklist */
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
802 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
803 enum vect_def_type
*dt
,
804 stmt_vector_for_cost
*prologue_cost_vec
,
805 stmt_vector_for_cost
*body_cost_vec
)
808 int inside_cost
= 0, prologue_cost
= 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info
))
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i
= 0; i
< 2; i
++)
816 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
817 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
818 stmt_info
, 0, vect_prologue
);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
822 stmt_info
, 0, vect_body
);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE
, vect_location
,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
838 enum vect_def_type
*dt
, int pwr
)
841 int inside_cost
= 0, prologue_cost
= 0;
842 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
843 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
844 void *target_cost_data
;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info
))
851 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
853 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
855 for (i
= 0; i
< pwr
+ 1; i
++)
857 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
859 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
860 vec_promote_demote
, stmt_info
, 0,
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i
= 0; i
< 2; i
++)
866 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
867 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
868 stmt_info
, 0, vect_prologue
);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE
, vect_location
,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
876 /* Function vect_cost_group_size
878 For grouped load or store, return the group_size only if it is the first
879 load or store of a group, else return 1. This ensures that group size is
880 only returned once per group. */
883 vect_cost_group_size (stmt_vec_info stmt_info
)
885 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
887 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
888 return GROUP_SIZE (stmt_info
);
894 /* Function vect_model_store_cost
896 Models cost for stores. In the case of grouped accesses, one access
897 has the overhead of the grouped access attributed to it. */
900 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
901 bool store_lanes_p
, enum vect_def_type dt
,
903 stmt_vector_for_cost
*prologue_cost_vec
,
904 stmt_vector_for_cost
*body_cost_vec
)
907 unsigned int inside_cost
= 0, prologue_cost
= 0;
908 struct data_reference
*first_dr
;
911 /* The SLP costs were already calculated during SLP tree build. */
912 if (PURE_SLP_STMT (stmt_info
))
915 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
916 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
917 stmt_info
, 0, vect_prologue
);
919 /* Grouped access? */
920 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
924 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
929 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
930 group_size
= vect_cost_group_size (stmt_info
);
933 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
935 /* Not a grouped access. */
939 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (!store_lanes_p
&& group_size
> 1)
948 /* Uses a high and low interleave operation for each needed permute. */
950 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
951 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
952 stmt_info
, 0, vect_body
);
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE
, vect_location
,
956 "vect_model_store_cost: strided group_size = %d .\n",
960 /* Costs of the stores. */
961 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE
, vect_location
,
965 "vect_model_store_cost: inside_cost = %d, "
966 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
970 /* Calculate cost of DR's memory access. */
972 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
973 unsigned int *inside_cost
,
974 stmt_vector_for_cost
*body_cost_vec
)
976 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
977 gimple stmt
= DR_STMT (dr
);
978 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
980 switch (alignment_support_scheme
)
984 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
985 vector_store
, stmt_info
, 0,
988 if (dump_enabled_p ())
989 dump_printf_loc (MSG_NOTE
, vect_location
,
990 "vect_model_store_cost: aligned.\n");
994 case dr_unaligned_supported
:
996 /* Here, we assign an additional cost for the unaligned store. */
997 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
998 unaligned_store
, stmt_info
,
999 DR_MISALIGNMENT (dr
), vect_body
);
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE
, vect_location
,
1002 "vect_model_store_cost: unaligned supported by "
1007 case dr_unaligned_unsupported
:
1009 *inside_cost
= VECT_MAX_COST
;
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1013 "vect_model_store_cost: unsupported access.\n");
1023 /* Function vect_model_load_cost
1025 Models cost for loads. In the case of grouped accesses, the last access
1026 has the overhead of the grouped access attributed to it. Since unaligned
1027 accesses are supported for loads, we also account for the costs of the
1028 access scheme chosen. */
1031 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1032 bool load_lanes_p
, slp_tree slp_node
,
1033 stmt_vector_for_cost
*prologue_cost_vec
,
1034 stmt_vector_for_cost
*body_cost_vec
)
1038 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1039 unsigned int inside_cost
= 0, prologue_cost
= 0;
1041 /* The SLP costs were already calculated during SLP tree build. */
1042 if (PURE_SLP_STMT (stmt_info
))
1045 /* Grouped accesses? */
1046 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1049 group_size
= vect_cost_group_size (stmt_info
);
1050 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1052 /* Not a grouped access. */
1059 /* We assume that the cost of a single load-lanes instruction is
1060 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1061 access is instead being provided by a load-and-permute operation,
1062 include the cost of the permutes. */
1063 if (!load_lanes_p
&& group_size
> 1)
1065 /* Uses an even and odd extract operations for each needed permute. */
1066 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1067 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1068 stmt_info
, 0, vect_body
);
1070 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_NOTE
, vect_location
,
1072 "vect_model_load_cost: strided group_size = %d .\n",
1076 /* The loads themselves. */
1077 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1079 /* N scalar loads plus gathering them into a vector. */
1080 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1081 inside_cost
+= record_stmt_cost (body_cost_vec
,
1082 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1083 scalar_load
, stmt_info
, 0, vect_body
);
1084 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1085 stmt_info
, 0, vect_body
);
1088 vect_get_load_cost (first_dr
, ncopies
,
1089 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1090 || group_size
> 1 || slp_node
),
1091 &inside_cost
, &prologue_cost
,
1092 prologue_cost_vec
, body_cost_vec
, true);
1094 if (dump_enabled_p ())
1095 dump_printf_loc (MSG_NOTE
, vect_location
,
1096 "vect_model_load_cost: inside_cost = %d, "
1097 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1101 /* Calculate cost of DR's memory access. */
1103 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1104 bool add_realign_cost
, unsigned int *inside_cost
,
1105 unsigned int *prologue_cost
,
1106 stmt_vector_for_cost
*prologue_cost_vec
,
1107 stmt_vector_for_cost
*body_cost_vec
,
1108 bool record_prologue_costs
)
1110 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1111 gimple stmt
= DR_STMT (dr
);
1112 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1114 switch (alignment_support_scheme
)
1118 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1119 stmt_info
, 0, vect_body
);
1121 if (dump_enabled_p ())
1122 dump_printf_loc (MSG_NOTE
, vect_location
,
1123 "vect_model_load_cost: aligned.\n");
1127 case dr_unaligned_supported
:
1129 /* Here, we assign an additional cost for the unaligned load. */
1130 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1131 unaligned_load
, stmt_info
,
1132 DR_MISALIGNMENT (dr
), vect_body
);
1134 if (dump_enabled_p ())
1135 dump_printf_loc (MSG_NOTE
, vect_location
,
1136 "vect_model_load_cost: unaligned supported by "
1141 case dr_explicit_realign
:
1143 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1144 vector_load
, stmt_info
, 0, vect_body
);
1145 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1146 vec_perm
, stmt_info
, 0, vect_body
);
1148 /* FIXME: If the misalignment remains fixed across the iterations of
1149 the containing loop, the following cost should be added to the
1151 if (targetm
.vectorize
.builtin_mask_for_load
)
1152 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1153 stmt_info
, 0, vect_body
);
1155 if (dump_enabled_p ())
1156 dump_printf_loc (MSG_NOTE
, vect_location
,
1157 "vect_model_load_cost: explicit realign\n");
1161 case dr_explicit_realign_optimized
:
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE
, vect_location
,
1165 "vect_model_load_cost: unaligned software "
1168 /* Unaligned software pipeline has a load of an address, an initial
1169 load, and possibly a mask operation to "prime" the loop. However,
1170 if this is an access in a group of loads, which provide grouped
1171 access, then the above cost should only be considered for one
1172 access in the group. Inside the loop, there is a load op
1173 and a realignment op. */
1175 if (add_realign_cost
&& record_prologue_costs
)
1177 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1178 vector_stmt
, stmt_info
,
1180 if (targetm
.vectorize
.builtin_mask_for_load
)
1181 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1182 vector_stmt
, stmt_info
,
1186 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1187 stmt_info
, 0, vect_body
);
1188 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1189 stmt_info
, 0, vect_body
);
1191 if (dump_enabled_p ())
1192 dump_printf_loc (MSG_NOTE
, vect_location
,
1193 "vect_model_load_cost: explicit realign optimized"
1199 case dr_unaligned_unsupported
:
1201 *inside_cost
= VECT_MAX_COST
;
1203 if (dump_enabled_p ())
1204 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1205 "vect_model_load_cost: unsupported access.\n");
1214 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1215 the loop preheader for the vectorized stmt STMT. */
1218 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1221 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1224 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1225 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1229 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1233 if (nested_in_vect_loop_p (loop
, stmt
))
1236 pe
= loop_preheader_edge (loop
);
1237 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1238 gcc_assert (!new_bb
);
1242 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1244 gimple_stmt_iterator gsi_bb_start
;
1246 gcc_assert (bb_vinfo
);
1247 bb
= BB_VINFO_BB (bb_vinfo
);
1248 gsi_bb_start
= gsi_after_labels (bb
);
1249 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1253 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE
, vect_location
,
1256 "created new init_stmt: ");
1257 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1258 dump_printf (MSG_NOTE
, "\n");
1262 /* Function vect_init_vector.
1264 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1265 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1266 vector type a vector with all elements equal to VAL is created first.
1267 Place the initialization at BSI if it is not NULL. Otherwise, place the
1268 initialization at the loop preheader.
1269 Return the DEF of INIT_STMT.
1270 It will be used in the vectorization of STMT. */
1273 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1280 if (TREE_CODE (type
) == VECTOR_TYPE
1281 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1283 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1285 if (CONSTANT_CLASS_P (val
))
1286 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1289 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1290 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1293 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1297 val
= build_vector_from_val (type
, val
);
1300 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1301 init_stmt
= gimple_build_assign (new_var
, val
);
1302 new_temp
= make_ssa_name (new_var
, init_stmt
);
1303 gimple_assign_set_lhs (init_stmt
, new_temp
);
1304 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1305 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1310 /* Function vect_get_vec_def_for_operand.
1312 OP is an operand in STMT. This function returns a (vector) def that will be
1313 used in the vectorized stmt for STMT.
1315 In the case that OP is an SSA_NAME which is defined in the loop, then
1316 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1318 In case OP is an invariant or constant, a new stmt that creates a vector def
1319 needs to be introduced. */
1322 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1327 stmt_vec_info def_stmt_info
= NULL
;
1328 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1329 unsigned int nunits
;
1330 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1332 enum vect_def_type dt
;
1336 if (dump_enabled_p ())
1338 dump_printf_loc (MSG_NOTE
, vect_location
,
1339 "vect_get_vec_def_for_operand: ");
1340 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1341 dump_printf (MSG_NOTE
, "\n");
1344 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1345 &def_stmt
, &def
, &dt
);
1346 gcc_assert (is_simple_use
);
1347 if (dump_enabled_p ())
1349 int loc_printed
= 0;
1352 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1354 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1355 dump_printf (MSG_NOTE
, "\n");
1360 dump_printf (MSG_NOTE
, " def_stmt = ");
1362 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1363 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1364 dump_printf (MSG_NOTE
, "\n");
1370 /* Case 1: operand is a constant. */
1371 case vect_constant_def
:
1373 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1374 gcc_assert (vector_type
);
1375 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1380 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1381 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE
, vect_location
,
1383 "Create vector_cst. nunits = %d\n", nunits
);
1385 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1388 /* Case 2: operand is defined outside the loop - loop invariant. */
1389 case vect_external_def
:
1391 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1392 gcc_assert (vector_type
);
1397 /* Create 'vec_inv = {inv,inv,..,inv}' */
1398 if (dump_enabled_p ())
1399 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1401 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1404 /* Case 3: operand is defined inside the loop. */
1405 case vect_internal_def
:
1408 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1410 /* Get the def from the vectorized stmt. */
1411 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1413 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1414 /* Get vectorized pattern statement. */
1416 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1417 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1418 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1419 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1420 gcc_assert (vec_stmt
);
1421 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1422 vec_oprnd
= PHI_RESULT (vec_stmt
);
1423 else if (is_gimple_call (vec_stmt
))
1424 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1426 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1430 /* Case 4: operand is defined by a loop header phi - reduction */
1431 case vect_reduction_def
:
1432 case vect_double_reduction_def
:
1433 case vect_nested_cycle
:
1437 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1438 loop
= (gimple_bb (def_stmt
))->loop_father
;
1440 /* Get the def before the loop */
1441 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1442 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1445 /* Case 5: operand is defined by loop-header phi - induction. */
1446 case vect_induction_def
:
1448 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1450 /* Get the def from the vectorized stmt. */
1451 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1452 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1453 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1454 vec_oprnd
= PHI_RESULT (vec_stmt
);
1456 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1466 /* Function vect_get_vec_def_for_stmt_copy
1468 Return a vector-def for an operand. This function is used when the
1469 vectorized stmt to be created (by the caller to this function) is a "copy"
1470 created in case the vectorized result cannot fit in one vector, and several
1471 copies of the vector-stmt are required. In this case the vector-def is
1472 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1473 of the stmt that defines VEC_OPRND.
1474 DT is the type of the vector def VEC_OPRND.
1477 In case the vectorization factor (VF) is bigger than the number
1478 of elements that can fit in a vectype (nunits), we have to generate
1479 more than one vector stmt to vectorize the scalar stmt. This situation
1480 arises when there are multiple data-types operated upon in the loop; the
1481 smallest data-type determines the VF, and as a result, when vectorizing
1482 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1483 vector stmt (each computing a vector of 'nunits' results, and together
1484 computing 'VF' results in each iteration). This function is called when
1485 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1486 which VF=16 and nunits=4, so the number of copies required is 4):
1488 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1490 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1491 VS1.1: vx.1 = memref1 VS1.2
1492 VS1.2: vx.2 = memref2 VS1.3
1493 VS1.3: vx.3 = memref3
1495 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1496 VSnew.1: vz1 = vx.1 + ... VSnew.2
1497 VSnew.2: vz2 = vx.2 + ... VSnew.3
1498 VSnew.3: vz3 = vx.3 + ...
1500 The vectorization of S1 is explained in vectorizable_load.
1501 The vectorization of S2:
1502 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1503 the function 'vect_get_vec_def_for_operand' is called to
1504 get the relevant vector-def for each operand of S2. For operand x it
1505 returns the vector-def 'vx.0'.
1507 To create the remaining copies of the vector-stmt (VSnew.j), this
1508 function is called to get the relevant vector-def for each operand. It is
1509 obtained from the respective VS1.j stmt, which is recorded in the
1510 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1512 For example, to obtain the vector-def 'vx.1' in order to create the
1513 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1514 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1515 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1516 and return its def ('vx.1').
1517 Overall, to create the above sequence this function will be called 3 times:
1518 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1519 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1520 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1523 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1525 gimple vec_stmt_for_operand
;
1526 stmt_vec_info def_stmt_info
;
1528 /* Do nothing; can reuse same def. */
1529 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1532 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1533 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1534 gcc_assert (def_stmt_info
);
1535 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1536 gcc_assert (vec_stmt_for_operand
);
1537 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1538 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1539 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1541 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1546 /* Get vectorized definitions for the operands to create a copy of an original
1547 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1550 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1551 vec
<tree
> *vec_oprnds0
,
1552 vec
<tree
> *vec_oprnds1
)
1554 tree vec_oprnd
= vec_oprnds0
->pop ();
1556 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1557 vec_oprnds0
->quick_push (vec_oprnd
);
1559 if (vec_oprnds1
&& vec_oprnds1
->length ())
1561 vec_oprnd
= vec_oprnds1
->pop ();
1562 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1563 vec_oprnds1
->quick_push (vec_oprnd
);
1568 /* Get vectorized definitions for OP0 and OP1.
1569 REDUC_INDEX is the index of reduction operand in case of reduction,
1570 and -1 otherwise. */
1573 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1574 vec
<tree
> *vec_oprnds0
,
1575 vec
<tree
> *vec_oprnds1
,
1576 slp_tree slp_node
, int reduc_index
)
1580 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1583 vec
<vec
<tree
> > vec_defs
;
1584 vec_defs
.create (nops
);
1586 ops
.quick_push (op0
);
1588 ops
.quick_push (op1
);
1590 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1592 *vec_oprnds0
= vec_defs
[0];
1594 *vec_oprnds1
= vec_defs
[1];
1597 vec_defs
.release ();
1603 vec_oprnds0
->create (1);
1604 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1605 vec_oprnds0
->quick_push (vec_oprnd
);
1609 vec_oprnds1
->create (1);
1610 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1611 vec_oprnds1
->quick_push (vec_oprnd
);
1617 /* Function vect_finish_stmt_generation.
1619 Insert a new stmt. */
1622 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1623 gimple_stmt_iterator
*gsi
)
1625 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1626 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1627 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1629 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1631 if (!gsi_end_p (*gsi
)
1632 && gimple_has_mem_ops (vec_stmt
))
1634 gimple at_stmt
= gsi_stmt (*gsi
);
1635 tree vuse
= gimple_vuse (at_stmt
);
1636 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1638 tree vdef
= gimple_vdef (at_stmt
);
1639 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1640 /* If we have an SSA vuse and insert a store, update virtual
1641 SSA form to avoid triggering the renamer. Do so only
1642 if we can easily see all uses - which is what almost always
1643 happens with the way vectorized stmts are inserted. */
1644 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1645 && ((is_gimple_assign (vec_stmt
)
1646 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1647 || (is_gimple_call (vec_stmt
)
1648 && !(gimple_call_flags (vec_stmt
)
1649 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1651 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1652 gimple_set_vdef (vec_stmt
, new_vdef
);
1653 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1657 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1659 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1662 if (dump_enabled_p ())
1664 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1666 dump_printf (MSG_NOTE
, "\n");
1669 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1672 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1673 a function declaration if the target has a vectorized version
1674 of the function, or NULL_TREE if the function cannot be vectorized. */
1677 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1679 tree fndecl
= gimple_call_fndecl (call
);
1681 /* We only handle functions that do not read or clobber memory -- i.e.
1682 const or novops ones. */
1683 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1687 || TREE_CODE (fndecl
) != FUNCTION_DECL
1688 || !DECL_BUILT_IN (fndecl
))
1691 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1695 /* Function vectorizable_call.
1697 Check if STMT performs a function call that can be vectorized.
1698 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1699 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1700 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1709 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1710 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1711 tree vectype_out
, vectype_in
;
1714 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1715 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1716 tree fndecl
, new_temp
, def
, rhs_type
;
1718 enum vect_def_type dt
[3]
1719 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1720 gimple new_stmt
= NULL
;
1722 vec
<tree
> vargs
= vNULL
;
1723 enum { NARROW
, NONE
, WIDEN
} modifier
;
1727 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1730 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1733 /* Is STMT a vectorizable call? */
1734 if (!is_gimple_call (stmt
))
1737 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1740 if (stmt_can_throw_internal (stmt
))
1743 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1745 /* Process function arguments. */
1746 rhs_type
= NULL_TREE
;
1747 vectype_in
= NULL_TREE
;
1748 nargs
= gimple_call_num_args (stmt
);
1750 /* Bail out if the function has more than three arguments, we do not have
1751 interesting builtin functions to vectorize with more than two arguments
1752 except for fma. No arguments is also not good. */
1753 if (nargs
== 0 || nargs
> 3)
1756 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1757 if (gimple_call_internal_p (stmt
)
1758 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1761 rhs_type
= unsigned_type_node
;
1764 for (i
= 0; i
< nargs
; i
++)
1768 op
= gimple_call_arg (stmt
, i
);
1770 /* We can only handle calls with arguments of the same type. */
1772 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1774 if (dump_enabled_p ())
1775 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1776 "argument types differ.\n");
1780 rhs_type
= TREE_TYPE (op
);
1782 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1783 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1787 "use not simple.\n");
1792 vectype_in
= opvectype
;
1794 && opvectype
!= vectype_in
)
1796 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1798 "argument vector types differ.\n");
1802 /* If all arguments are external or constant defs use a vector type with
1803 the same size as the output vector type. */
1805 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1807 gcc_assert (vectype_in
);
1810 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1813 "no vectype for scalar type ");
1814 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
1815 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1822 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1823 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1824 if (nunits_in
== nunits_out
/ 2)
1826 else if (nunits_out
== nunits_in
)
1828 else if (nunits_out
== nunits_in
/ 2)
1833 /* For now, we only vectorize functions if a target specific builtin
1834 is available. TODO -- in some cases, it might be profitable to
1835 insert the calls for pieces of the vector, in order to be able
1836 to vectorize other operations in the loop. */
1837 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1838 if (fndecl
== NULL_TREE
)
1840 if (gimple_call_internal_p (stmt
)
1841 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
1844 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1845 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
1846 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1847 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
1849 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1850 { 0, 1, 2, ... vf - 1 } vector. */
1851 gcc_assert (nargs
== 0);
1855 if (dump_enabled_p ())
1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1857 "function is not vectorizable.\n");
1862 gcc_assert (!gimple_vuse (stmt
));
1864 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1866 else if (modifier
== NARROW
)
1867 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1869 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1871 /* Sanity check: make sure that at least one copy of the vectorized stmt
1872 needs to be generated. */
1873 gcc_assert (ncopies
>= 1);
1875 if (!vec_stmt
) /* transformation not required. */
1877 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1878 if (dump_enabled_p ())
1879 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
1881 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
1891 scalar_dest
= gimple_call_lhs (stmt
);
1892 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1894 prev_stmt_info
= NULL
;
1898 for (j
= 0; j
< ncopies
; ++j
)
1900 /* Build argument list for the vectorized call. */
1902 vargs
.create (nargs
);
1908 vec
<vec
<tree
> > vec_defs
;
1909 vec_defs
.create (nargs
);
1910 vec
<tree
> vec_oprnds0
;
1912 for (i
= 0; i
< nargs
; i
++)
1913 vargs
.quick_push (gimple_call_arg (stmt
, i
));
1914 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1915 vec_oprnds0
= vec_defs
[0];
1917 /* Arguments are ready. Create the new vector stmt. */
1918 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
1921 for (k
= 0; k
< nargs
; k
++)
1923 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
1924 vargs
[k
] = vec_oprndsk
[i
];
1926 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1927 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1928 gimple_call_set_lhs (new_stmt
, new_temp
);
1929 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1930 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
1933 for (i
= 0; i
< nargs
; i
++)
1935 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
1936 vec_oprndsi
.release ();
1938 vec_defs
.release ();
1942 for (i
= 0; i
< nargs
; i
++)
1944 op
= gimple_call_arg (stmt
, i
);
1947 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1950 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1952 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1955 vargs
.quick_push (vec_oprnd0
);
1958 if (gimple_call_internal_p (stmt
)
1959 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1961 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
1963 for (k
= 0; k
< nunits_out
; ++k
)
1964 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
1965 tree cst
= build_vector (vectype_out
, v
);
1967 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
1968 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
1969 new_temp
= make_ssa_name (new_var
, init_stmt
);
1970 gimple_assign_set_lhs (init_stmt
, new_temp
);
1971 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
1972 new_temp
= make_ssa_name (vec_dest
, NULL
);
1973 new_stmt
= gimple_build_assign (new_temp
,
1974 gimple_assign_lhs (init_stmt
));
1978 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1979 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1980 gimple_call_set_lhs (new_stmt
, new_temp
);
1982 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1985 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1987 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1989 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1995 for (j
= 0; j
< ncopies
; ++j
)
1997 /* Build argument list for the vectorized call. */
1999 vargs
.create (nargs
* 2);
2005 vec
<vec
<tree
> > vec_defs
;
2006 vec_defs
.create (nargs
);
2007 vec
<tree
> vec_oprnds0
;
2009 for (i
= 0; i
< nargs
; i
++)
2010 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2011 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2012 vec_oprnds0
= vec_defs
[0];
2014 /* Arguments are ready. Create the new vector stmt. */
2015 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2019 for (k
= 0; k
< nargs
; k
++)
2021 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2022 vargs
.quick_push (vec_oprndsk
[i
]);
2023 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2025 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2026 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2027 gimple_call_set_lhs (new_stmt
, new_temp
);
2028 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2029 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2032 for (i
= 0; i
< nargs
; i
++)
2034 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2035 vec_oprndsi
.release ();
2037 vec_defs
.release ();
2041 for (i
= 0; i
< nargs
; i
++)
2043 op
= gimple_call_arg (stmt
, i
);
2047 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2049 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2053 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2055 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2057 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2060 vargs
.quick_push (vec_oprnd0
);
2061 vargs
.quick_push (vec_oprnd1
);
2064 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2065 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2066 gimple_call_set_lhs (new_stmt
, new_temp
);
2067 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2070 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2072 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2074 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2077 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2082 /* No current target implements this case. */
2088 /* Update the exception handling table with the vector stmt if necessary. */
2089 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2090 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2092 /* The call in STMT might prevent it from being removed in dce.
2093 We however cannot remove it here, due to the way the ssa name
2094 it defines is mapped to the new definition. So just replace
2095 rhs of the statement with something harmless. */
2100 type
= TREE_TYPE (scalar_dest
);
2101 if (is_pattern_stmt_p (stmt_info
))
2102 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2104 lhs
= gimple_call_lhs (stmt
);
2105 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2106 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2107 set_vinfo_for_stmt (stmt
, NULL
);
2108 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2109 gsi_replace (gsi
, new_stmt
, false);
2115 /* Function vect_gen_widened_results_half
2117 Create a vector stmt whose code, type, number of arguments, and result
2118 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2119 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2120 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2121 needs to be created (DECL is a function-decl of a target-builtin).
2122 STMT is the original scalar stmt that we are vectorizing. */
2125 vect_gen_widened_results_half (enum tree_code code
,
2127 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2128 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2134 /* Generate half of the widened result: */
2135 if (code
== CALL_EXPR
)
2137 /* Target specific support */
2138 if (op_type
== binary_op
)
2139 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2141 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2142 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2143 gimple_call_set_lhs (new_stmt
, new_temp
);
2147 /* Generic support */
2148 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2149 if (op_type
!= binary_op
)
2151 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2153 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2154 gimple_assign_set_lhs (new_stmt
, new_temp
);
2156 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2162 /* Get vectorized definitions for loop-based vectorization. For the first
2163 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2164 scalar operand), and for the rest we get a copy with
2165 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2166 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2167 The vectors are collected into VEC_OPRNDS. */
2170 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2171 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
2175 /* Get first vector operand. */
2176 /* All the vector operands except the very first one (that is scalar oprnd)
2178 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2179 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2181 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2183 vec_oprnds
->quick_push (vec_oprnd
);
2185 /* Get second vector operand. */
2186 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2187 vec_oprnds
->quick_push (vec_oprnd
);
2191 /* For conversion in multiple steps, continue to get operands
2194 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2198 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2199 For multi-step conversions store the resulting vectors and call the function
2203 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
2204 int multi_step_cvt
, gimple stmt
,
2206 gimple_stmt_iterator
*gsi
,
2207 slp_tree slp_node
, enum tree_code code
,
2208 stmt_vec_info
*prev_stmt_info
)
2211 tree vop0
, vop1
, new_tmp
, vec_dest
;
2213 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2215 vec_dest
= vec_dsts
.pop ();
2217 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
2219 /* Create demotion operation. */
2220 vop0
= (*vec_oprnds
)[i
];
2221 vop1
= (*vec_oprnds
)[i
+ 1];
2222 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2223 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2224 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2225 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2228 /* Store the resulting vector for next recursive call. */
2229 (*vec_oprnds
)[i
/2] = new_tmp
;
2232 /* This is the last step of the conversion sequence. Store the
2233 vectors in SLP_NODE or in vector info of the scalar statement
2234 (or in STMT_VINFO_RELATED_STMT chain). */
2236 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2239 if (!*prev_stmt_info
)
2240 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2242 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2244 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2249 /* For multi-step demotion operations we first generate demotion operations
2250 from the source type to the intermediate types, and then combine the
2251 results (stored in VEC_OPRNDS) in demotion operation to the destination
2255 /* At each level of recursion we have half of the operands we had at the
2257 vec_oprnds
->truncate ((i
+1)/2);
2258 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2259 stmt
, vec_dsts
, gsi
, slp_node
,
2260 VEC_PACK_TRUNC_EXPR
,
2264 vec_dsts
.quick_push (vec_dest
);
2268 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2269 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2270 the resulting vectors and call the function recursively. */
2273 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
2274 vec
<tree
> *vec_oprnds1
,
2275 gimple stmt
, tree vec_dest
,
2276 gimple_stmt_iterator
*gsi
,
2277 enum tree_code code1
,
2278 enum tree_code code2
, tree decl1
,
2279 tree decl2
, int op_type
)
2282 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2283 gimple new_stmt1
, new_stmt2
;
2284 vec
<tree
> vec_tmp
= vNULL
;
2286 vec_tmp
.create (vec_oprnds0
->length () * 2);
2287 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
2289 if (op_type
== binary_op
)
2290 vop1
= (*vec_oprnds1
)[i
];
2294 /* Generate the two halves of promotion operation. */
2295 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2296 op_type
, vec_dest
, gsi
, stmt
);
2297 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2298 op_type
, vec_dest
, gsi
, stmt
);
2299 if (is_gimple_call (new_stmt1
))
2301 new_tmp1
= gimple_call_lhs (new_stmt1
);
2302 new_tmp2
= gimple_call_lhs (new_stmt2
);
2306 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2307 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2310 /* Store the results for the next step. */
2311 vec_tmp
.quick_push (new_tmp1
);
2312 vec_tmp
.quick_push (new_tmp2
);
2315 vec_oprnds0
->release ();
2316 *vec_oprnds0
= vec_tmp
;
2320 /* Check if STMT performs a conversion operation, that can be vectorized.
2321 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2322 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2323 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2326 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2327 gimple
*vec_stmt
, slp_tree slp_node
)
2331 tree op0
, op1
= NULL_TREE
;
2332 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2333 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2334 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2335 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2336 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2337 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2341 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2342 gimple new_stmt
= NULL
;
2343 stmt_vec_info prev_stmt_info
;
2346 tree vectype_out
, vectype_in
;
2348 tree lhs_type
, rhs_type
;
2349 enum { NARROW
, NONE
, WIDEN
} modifier
;
2350 vec
<tree
> vec_oprnds0
= vNULL
;
2351 vec
<tree
> vec_oprnds1
= vNULL
;
2353 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2354 int multi_step_cvt
= 0;
2355 vec
<tree
> vec_dsts
= vNULL
;
2356 vec
<tree
> interm_types
= vNULL
;
2357 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2359 enum machine_mode rhs_mode
;
2360 unsigned short fltsz
;
2362 /* Is STMT a vectorizable conversion? */
2364 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2367 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2370 if (!is_gimple_assign (stmt
))
2373 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2376 code
= gimple_assign_rhs_code (stmt
);
2377 if (!CONVERT_EXPR_CODE_P (code
)
2378 && code
!= FIX_TRUNC_EXPR
2379 && code
!= FLOAT_EXPR
2380 && code
!= WIDEN_MULT_EXPR
2381 && code
!= WIDEN_LSHIFT_EXPR
)
2384 op_type
= TREE_CODE_LENGTH (code
);
2386 /* Check types of lhs and rhs. */
2387 scalar_dest
= gimple_assign_lhs (stmt
);
2388 lhs_type
= TREE_TYPE (scalar_dest
);
2389 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2391 op0
= gimple_assign_rhs1 (stmt
);
2392 rhs_type
= TREE_TYPE (op0
);
2394 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2395 && !((INTEGRAL_TYPE_P (lhs_type
)
2396 && INTEGRAL_TYPE_P (rhs_type
))
2397 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2398 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2401 if ((INTEGRAL_TYPE_P (lhs_type
)
2402 && (TYPE_PRECISION (lhs_type
)
2403 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2404 || (INTEGRAL_TYPE_P (rhs_type
)
2405 && (TYPE_PRECISION (rhs_type
)
2406 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2408 if (dump_enabled_p ())
2409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2410 "type conversion to/from bit-precision unsupported."
2415 /* Check the operands of the operation. */
2416 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2417 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2419 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2421 "use not simple.\n");
2424 if (op_type
== binary_op
)
2428 op1
= gimple_assign_rhs2 (stmt
);
2429 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2430 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2432 if (CONSTANT_CLASS_P (op0
))
2433 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2434 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2436 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2443 "use not simple.\n");
2448 /* If op0 is an external or constant defs use a vector type of
2449 the same size as the output vector type. */
2451 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2453 gcc_assert (vectype_in
);
2456 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2459 "no vectype for scalar type ");
2460 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2461 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2467 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2468 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2469 if (nunits_in
< nunits_out
)
2471 else if (nunits_out
== nunits_in
)
2476 /* Multiple types in SLP are handled by creating the appropriate number of
2477 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2479 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2481 else if (modifier
== NARROW
)
2482 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2484 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2486 /* Sanity check: make sure that at least one copy of the vectorized stmt
2487 needs to be generated. */
2488 gcc_assert (ncopies
>= 1);
2490 /* Supportable by target? */
2494 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2496 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2503 "conversion not supported by target.\n");
2507 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2508 &code1
, &code2
, &multi_step_cvt
,
2511 /* Binary widening operation can only be supported directly by the
2513 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2517 if (code
!= FLOAT_EXPR
2518 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2519 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2522 rhs_mode
= TYPE_MODE (rhs_type
);
2523 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2524 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2525 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2526 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2529 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2530 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2531 if (cvt_type
== NULL_TREE
)
2534 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2536 if (!supportable_convert_operation (code
, vectype_out
,
2537 cvt_type
, &decl1
, &codecvt1
))
2540 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2541 cvt_type
, &codecvt1
,
2542 &codecvt2
, &multi_step_cvt
,
2546 gcc_assert (multi_step_cvt
== 0);
2548 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2549 vectype_in
, &code1
, &code2
,
2550 &multi_step_cvt
, &interm_types
))
2554 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2557 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2558 codecvt2
= ERROR_MARK
;
2562 interm_types
.safe_push (cvt_type
);
2563 cvt_type
= NULL_TREE
;
2568 gcc_assert (op_type
== unary_op
);
2569 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2570 &code1
, &multi_step_cvt
,
2574 if (code
!= FIX_TRUNC_EXPR
2575 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2576 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2579 rhs_mode
= TYPE_MODE (rhs_type
);
2581 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2582 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2583 if (cvt_type
== NULL_TREE
)
2585 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2588 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2589 &code1
, &multi_step_cvt
,
2598 if (!vec_stmt
) /* transformation not required. */
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_NOTE
, vect_location
,
2602 "=== vectorizable_conversion ===\n");
2603 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2605 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2606 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2608 else if (modifier
== NARROW
)
2610 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2611 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2615 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2616 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2618 interm_types
.release ();
2623 if (dump_enabled_p ())
2624 dump_printf_loc (MSG_NOTE
, vect_location
,
2625 "transform conversion. ncopies = %d.\n", ncopies
);
2627 if (op_type
== binary_op
)
2629 if (CONSTANT_CLASS_P (op0
))
2630 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2631 else if (CONSTANT_CLASS_P (op1
))
2632 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2635 /* In case of multi-step conversion, we first generate conversion operations
2636 to the intermediate types, and then from that types to the final one.
2637 We create vector destinations for the intermediate type (TYPES) received
2638 from supportable_*_operation, and store them in the correct order
2639 for future use in vect_create_vectorized_*_stmts (). */
2640 vec_dsts
.create (multi_step_cvt
+ 1);
2641 vec_dest
= vect_create_destination_var (scalar_dest
,
2642 (cvt_type
&& modifier
== WIDEN
)
2643 ? cvt_type
: vectype_out
);
2644 vec_dsts
.quick_push (vec_dest
);
2648 for (i
= interm_types
.length () - 1;
2649 interm_types
.iterate (i
, &intermediate_type
); i
--)
2651 vec_dest
= vect_create_destination_var (scalar_dest
,
2653 vec_dsts
.quick_push (vec_dest
);
2658 vec_dest
= vect_create_destination_var (scalar_dest
,
2660 ? vectype_out
: cvt_type
);
2664 if (modifier
== WIDEN
)
2666 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
2667 if (op_type
== binary_op
)
2668 vec_oprnds1
.create (1);
2670 else if (modifier
== NARROW
)
2671 vec_oprnds0
.create (
2672 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
2674 else if (code
== WIDEN_LSHIFT_EXPR
)
2675 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
2678 prev_stmt_info
= NULL
;
2682 for (j
= 0; j
< ncopies
; j
++)
2685 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2688 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2690 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2692 /* Arguments are ready, create the new vector stmt. */
2693 if (code1
== CALL_EXPR
)
2695 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2696 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2697 gimple_call_set_lhs (new_stmt
, new_temp
);
2701 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2702 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2704 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2705 gimple_assign_set_lhs (new_stmt
, new_temp
);
2708 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2710 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2714 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2716 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2717 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2722 /* In case the vectorization factor (VF) is bigger than the number
2723 of elements that we can fit in a vectype (nunits), we have to
2724 generate more than one vector stmt - i.e - we need to "unroll"
2725 the vector stmt by a factor VF/nunits. */
2726 for (j
= 0; j
< ncopies
; j
++)
2733 if (code
== WIDEN_LSHIFT_EXPR
)
2738 /* Store vec_oprnd1 for every vector stmt to be created
2739 for SLP_NODE. We check during the analysis that all
2740 the shift arguments are the same. */
2741 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2742 vec_oprnds1
.quick_push (vec_oprnd1
);
2744 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2748 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2749 &vec_oprnds1
, slp_node
, -1);
2753 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2754 vec_oprnds0
.quick_push (vec_oprnd0
);
2755 if (op_type
== binary_op
)
2757 if (code
== WIDEN_LSHIFT_EXPR
)
2760 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2762 vec_oprnds1
.quick_push (vec_oprnd1
);
2768 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2769 vec_oprnds0
.truncate (0);
2770 vec_oprnds0
.quick_push (vec_oprnd0
);
2771 if (op_type
== binary_op
)
2773 if (code
== WIDEN_LSHIFT_EXPR
)
2776 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2778 vec_oprnds1
.truncate (0);
2779 vec_oprnds1
.quick_push (vec_oprnd1
);
2783 /* Arguments are ready. Create the new vector stmts. */
2784 for (i
= multi_step_cvt
; i
>= 0; i
--)
2786 tree this_dest
= vec_dsts
[i
];
2787 enum tree_code c1
= code1
, c2
= code2
;
2788 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2793 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2795 stmt
, this_dest
, gsi
,
2796 c1
, c2
, decl1
, decl2
,
2800 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2804 if (codecvt1
== CALL_EXPR
)
2806 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2807 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2808 gimple_call_set_lhs (new_stmt
, new_temp
);
2812 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2813 new_temp
= make_ssa_name (vec_dest
, NULL
);
2814 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2819 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2822 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2825 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2828 if (!prev_stmt_info
)
2829 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2831 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2832 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2837 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2841 /* In case the vectorization factor (VF) is bigger than the number
2842 of elements that we can fit in a vectype (nunits), we have to
2843 generate more than one vector stmt - i.e - we need to "unroll"
2844 the vector stmt by a factor VF/nunits. */
2845 for (j
= 0; j
< ncopies
; j
++)
2849 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2853 vec_oprnds0
.truncate (0);
2854 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2855 vect_pow2 (multi_step_cvt
) - 1);
2858 /* Arguments are ready. Create the new vector stmts. */
2860 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2862 if (codecvt1
== CALL_EXPR
)
2864 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2865 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2866 gimple_call_set_lhs (new_stmt
, new_temp
);
2870 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2871 new_temp
= make_ssa_name (vec_dest
, NULL
);
2872 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2876 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2877 vec_oprnds0
[i
] = new_temp
;
2880 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2881 stmt
, vec_dsts
, gsi
,
2886 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2890 vec_oprnds0
.release ();
2891 vec_oprnds1
.release ();
2892 vec_dsts
.release ();
2893 interm_types
.release ();
2899 /* Function vectorizable_assignment.
2901 Check if STMT performs an assignment (copy) that can be vectorized.
2902 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2903 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2904 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2907 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2908 gimple
*vec_stmt
, slp_tree slp_node
)
2913 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2914 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2915 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2919 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2920 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2923 vec
<tree
> vec_oprnds
= vNULL
;
2925 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2926 gimple new_stmt
= NULL
;
2927 stmt_vec_info prev_stmt_info
= NULL
;
2928 enum tree_code code
;
2931 /* Multiple types in SLP are handled by creating the appropriate number of
2932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2934 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2937 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2939 gcc_assert (ncopies
>= 1);
2941 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2944 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2947 /* Is vectorizable assignment? */
2948 if (!is_gimple_assign (stmt
))
2951 scalar_dest
= gimple_assign_lhs (stmt
);
2952 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2955 code
= gimple_assign_rhs_code (stmt
);
2956 if (gimple_assign_single_p (stmt
)
2957 || code
== PAREN_EXPR
2958 || CONVERT_EXPR_CODE_P (code
))
2959 op
= gimple_assign_rhs1 (stmt
);
2963 if (code
== VIEW_CONVERT_EXPR
)
2964 op
= TREE_OPERAND (op
, 0);
2966 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2967 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2969 if (dump_enabled_p ())
2970 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2971 "use not simple.\n");
2975 /* We can handle NOP_EXPR conversions that do not change the number
2976 of elements or the vector size. */
2977 if ((CONVERT_EXPR_CODE_P (code
)
2978 || code
== VIEW_CONVERT_EXPR
)
2980 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2981 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2982 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2985 /* We do not handle bit-precision changes. */
2986 if ((CONVERT_EXPR_CODE_P (code
)
2987 || code
== VIEW_CONVERT_EXPR
)
2988 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2989 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2990 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2991 || ((TYPE_PRECISION (TREE_TYPE (op
))
2992 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2993 /* But a conversion that does not change the bit-pattern is ok. */
2994 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2995 > TYPE_PRECISION (TREE_TYPE (op
)))
2996 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3000 "type conversion to/from bit-precision "
3005 if (!vec_stmt
) /* transformation not required. */
3007 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3008 if (dump_enabled_p ())
3009 dump_printf_loc (MSG_NOTE
, vect_location
,
3010 "=== vectorizable_assignment ===\n");
3011 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3016 if (dump_enabled_p ())
3017 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
3020 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3023 for (j
= 0; j
< ncopies
; j
++)
3027 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
3029 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
3031 /* Arguments are ready. create the new vector stmt. */
3032 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3034 if (CONVERT_EXPR_CODE_P (code
)
3035 || code
== VIEW_CONVERT_EXPR
)
3036 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
3037 new_stmt
= gimple_build_assign (vec_dest
, vop
);
3038 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3039 gimple_assign_set_lhs (new_stmt
, new_temp
);
3040 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3042 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3049 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3051 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3053 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3056 vec_oprnds
.release ();
3061 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3062 either as shift by a scalar or by a vector. */
3065 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
3068 enum machine_mode vec_mode
;
3073 vectype
= get_vectype_for_scalar_type (scalar_type
);
3077 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3079 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
3081 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3083 || (optab_handler (optab
, TYPE_MODE (vectype
))
3084 == CODE_FOR_nothing
))
3088 vec_mode
= TYPE_MODE (vectype
);
3089 icode
= (int) optab_handler (optab
, vec_mode
);
3090 if (icode
== CODE_FOR_nothing
)
3097 /* Function vectorizable_shift.
3099 Check if STMT performs a shift operation that can be vectorized.
3100 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3101 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3102 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3105 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3106 gimple
*vec_stmt
, slp_tree slp_node
)
3110 tree op0
, op1
= NULL
;
3111 tree vec_oprnd1
= NULL_TREE
;
3112 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3114 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3115 enum tree_code code
;
3116 enum machine_mode vec_mode
;
3120 enum machine_mode optab_op2_mode
;
3123 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3124 gimple new_stmt
= NULL
;
3125 stmt_vec_info prev_stmt_info
;
3132 vec
<tree
> vec_oprnds0
= vNULL
;
3133 vec
<tree
> vec_oprnds1
= vNULL
;
3136 bool scalar_shift_arg
= true;
3137 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3140 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3143 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3146 /* Is STMT a vectorizable binary/unary operation? */
3147 if (!is_gimple_assign (stmt
))
3150 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3153 code
= gimple_assign_rhs_code (stmt
);
3155 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3156 || code
== RROTATE_EXPR
))
3159 scalar_dest
= gimple_assign_lhs (stmt
);
3160 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3161 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3162 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3164 if (dump_enabled_p ())
3165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3166 "bit-precision shifts not supported.\n");
3170 op0
= gimple_assign_rhs1 (stmt
);
3171 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3172 &def_stmt
, &def
, &dt
[0], &vectype
))
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3176 "use not simple.\n");
3179 /* If op0 is an external or constant def use a vector type with
3180 the same size as the output vector type. */
3182 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3184 gcc_assert (vectype
);
3187 if (dump_enabled_p ())
3188 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3189 "no vectype for scalar type\n");
3193 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3194 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3195 if (nunits_out
!= nunits_in
)
3198 op1
= gimple_assign_rhs2 (stmt
);
3199 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3200 &def
, &dt
[1], &op1_vectype
))
3202 if (dump_enabled_p ())
3203 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3204 "use not simple.\n");
3209 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3213 /* Multiple types in SLP are handled by creating the appropriate number of
3214 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3216 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3219 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3221 gcc_assert (ncopies
>= 1);
3223 /* Determine whether the shift amount is a vector, or scalar. If the
3224 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3226 if (dt
[1] == vect_internal_def
&& !slp_node
)
3227 scalar_shift_arg
= false;
3228 else if (dt
[1] == vect_constant_def
3229 || dt
[1] == vect_external_def
3230 || dt
[1] == vect_internal_def
)
3232 /* In SLP, need to check whether the shift count is the same,
3233 in loops if it is a constant or invariant, it is always
3237 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3240 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
3241 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3242 scalar_shift_arg
= false;
3247 if (dump_enabled_p ())
3248 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3249 "operand mode requires invariant argument.\n");
3253 /* Vector shifted by vector. */
3254 if (!scalar_shift_arg
)
3256 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_NOTE
, vect_location
,
3259 "vector/vector shift/rotate found.\n");
3262 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3263 if (op1_vectype
== NULL_TREE
3264 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3268 "unusable type for last operand in"
3269 " vector/vector shift/rotate.\n");
3273 /* See if the machine has a vector shifted by scalar insn and if not
3274 then see if it has a vector shifted by vector insn. */
3277 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3279 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3281 if (dump_enabled_p ())
3282 dump_printf_loc (MSG_NOTE
, vect_location
,
3283 "vector/scalar shift/rotate found.\n");
3287 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3289 && (optab_handler (optab
, TYPE_MODE (vectype
))
3290 != CODE_FOR_nothing
))
3292 scalar_shift_arg
= false;
3294 if (dump_enabled_p ())
3295 dump_printf_loc (MSG_NOTE
, vect_location
,
3296 "vector/vector shift/rotate found.\n");
3298 /* Unlike the other binary operators, shifts/rotates have
3299 the rhs being int, instead of the same type as the lhs,
3300 so make sure the scalar is the right type if we are
3301 dealing with vectors of long long/long/short/char. */
3302 if (dt
[1] == vect_constant_def
)
3303 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3304 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3308 && TYPE_MODE (TREE_TYPE (vectype
))
3309 != TYPE_MODE (TREE_TYPE (op1
)))
3311 if (dump_enabled_p ())
3312 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3313 "unusable type for last operand in"
3314 " vector/vector shift/rotate.\n");
3317 if (vec_stmt
&& !slp_node
)
3319 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3320 op1
= vect_init_vector (stmt
, op1
,
3321 TREE_TYPE (vectype
), NULL
);
3328 /* Supportable by target? */
3331 if (dump_enabled_p ())
3332 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3336 vec_mode
= TYPE_MODE (vectype
);
3337 icode
= (int) optab_handler (optab
, vec_mode
);
3338 if (icode
== CODE_FOR_nothing
)
3340 if (dump_enabled_p ())
3341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3342 "op not supported by target.\n");
3343 /* Check only during analysis. */
3344 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3345 || (vf
< vect_min_worthwhile_factor (code
)
3348 if (dump_enabled_p ())
3349 dump_printf_loc (MSG_NOTE
, vect_location
,
3350 "proceeding using word mode.\n");
3353 /* Worthwhile without SIMD support? Check only during analysis. */
3354 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3355 && vf
< vect_min_worthwhile_factor (code
)
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3360 "not worthwhile without SIMD support.\n");
3364 if (!vec_stmt
) /* transformation not required. */
3366 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3367 if (dump_enabled_p ())
3368 dump_printf_loc (MSG_NOTE
, vect_location
,
3369 "=== vectorizable_shift ===\n");
3370 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3376 if (dump_enabled_p ())
3377 dump_printf_loc (MSG_NOTE
, vect_location
,
3378 "transform binary/unary operation.\n");
3381 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3383 prev_stmt_info
= NULL
;
3384 for (j
= 0; j
< ncopies
; j
++)
3389 if (scalar_shift_arg
)
3391 /* Vector shl and shr insn patterns can be defined with scalar
3392 operand 2 (shift operand). In this case, use constant or loop
3393 invariant op1 directly, without extending it to vector mode
3395 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3396 if (!VECTOR_MODE_P (optab_op2_mode
))
3398 if (dump_enabled_p ())
3399 dump_printf_loc (MSG_NOTE
, vect_location
,
3400 "operand 1 using scalar mode.\n");
3402 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
3403 vec_oprnds1
.quick_push (vec_oprnd1
);
3406 /* Store vec_oprnd1 for every vector stmt to be created
3407 for SLP_NODE. We check during the analysis that all
3408 the shift arguments are the same.
3409 TODO: Allow different constants for different vector
3410 stmts generated for an SLP instance. */
3411 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3412 vec_oprnds1
.quick_push (vec_oprnd1
);
3417 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3418 (a special case for certain kind of vector shifts); otherwise,
3419 operand 1 should be of a vector type (the usual case). */
3421 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3424 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3428 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3430 /* Arguments are ready. Create the new vector stmt. */
3431 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3433 vop1
= vec_oprnds1
[i
];
3434 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3435 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3436 gimple_assign_set_lhs (new_stmt
, new_temp
);
3437 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3439 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3446 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3448 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3449 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3452 vec_oprnds0
.release ();
3453 vec_oprnds1
.release ();
3459 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3460 gimple_stmt_iterator
*);
3463 /* Function vectorizable_operation.
3465 Check if STMT performs a binary, unary or ternary operation that can
3467 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3468 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3469 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3472 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3473 gimple
*vec_stmt
, slp_tree slp_node
)
3477 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3478 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3480 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3481 enum tree_code code
;
3482 enum machine_mode vec_mode
;
3489 enum vect_def_type dt
[3]
3490 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3491 gimple new_stmt
= NULL
;
3492 stmt_vec_info prev_stmt_info
;
3498 vec
<tree
> vec_oprnds0
= vNULL
;
3499 vec
<tree
> vec_oprnds1
= vNULL
;
3500 vec
<tree
> vec_oprnds2
= vNULL
;
3501 tree vop0
, vop1
, vop2
;
3502 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3505 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3508 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3511 /* Is STMT a vectorizable binary/unary operation? */
3512 if (!is_gimple_assign (stmt
))
3515 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3518 code
= gimple_assign_rhs_code (stmt
);
3520 /* For pointer addition, we should use the normal plus for
3521 the vector addition. */
3522 if (code
== POINTER_PLUS_EXPR
)
3525 /* Support only unary or binary operations. */
3526 op_type
= TREE_CODE_LENGTH (code
);
3527 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3529 if (dump_enabled_p ())
3530 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3531 "num. args = %d (not unary/binary/ternary op).\n",
3536 scalar_dest
= gimple_assign_lhs (stmt
);
3537 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3539 /* Most operations cannot handle bit-precision types without extra
3541 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3542 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3543 /* Exception are bitwise binary operations. */
3544 && code
!= BIT_IOR_EXPR
3545 && code
!= BIT_XOR_EXPR
3546 && code
!= BIT_AND_EXPR
)
3548 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3550 "bit-precision arithmetic not supported.\n");
3554 op0
= gimple_assign_rhs1 (stmt
);
3555 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3556 &def_stmt
, &def
, &dt
[0], &vectype
))
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3560 "use not simple.\n");
3563 /* If op0 is an external or constant def use a vector type with
3564 the same size as the output vector type. */
3566 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3568 gcc_assert (vectype
);
3571 if (dump_enabled_p ())
3573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3574 "no vectype for scalar type ");
3575 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3577 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3583 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3584 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3585 if (nunits_out
!= nunits_in
)
3588 if (op_type
== binary_op
|| op_type
== ternary_op
)
3590 op1
= gimple_assign_rhs2 (stmt
);
3591 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3594 if (dump_enabled_p ())
3595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3596 "use not simple.\n");
3600 if (op_type
== ternary_op
)
3602 op2
= gimple_assign_rhs3 (stmt
);
3603 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3606 if (dump_enabled_p ())
3607 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3608 "use not simple.\n");
3614 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3618 /* Multiple types in SLP are handled by creating the appropriate number of
3619 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3621 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3624 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3626 gcc_assert (ncopies
>= 1);
3628 /* Shifts are handled in vectorizable_shift (). */
3629 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3630 || code
== RROTATE_EXPR
)
3633 /* Supportable by target? */
3635 vec_mode
= TYPE_MODE (vectype
);
3636 if (code
== MULT_HIGHPART_EXPR
)
3638 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3639 icode
= LAST_INSN_CODE
;
3641 icode
= CODE_FOR_nothing
;
3645 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3648 if (dump_enabled_p ())
3649 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3653 icode
= (int) optab_handler (optab
, vec_mode
);
3656 if (icode
== CODE_FOR_nothing
)
3658 if (dump_enabled_p ())
3659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3660 "op not supported by target.\n");
3661 /* Check only during analysis. */
3662 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3663 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3665 if (dump_enabled_p ())
3666 dump_printf_loc (MSG_NOTE
, vect_location
,
3667 "proceeding using word mode.\n");
3670 /* Worthwhile without SIMD support? Check only during analysis. */
3671 if (!VECTOR_MODE_P (vec_mode
)
3673 && vf
< vect_min_worthwhile_factor (code
))
3675 if (dump_enabled_p ())
3676 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3677 "not worthwhile without SIMD support.\n");
3681 if (!vec_stmt
) /* transformation not required. */
3683 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3684 if (dump_enabled_p ())
3685 dump_printf_loc (MSG_NOTE
, vect_location
,
3686 "=== vectorizable_operation ===\n");
3687 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3693 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_NOTE
, vect_location
,
3695 "transform binary/unary operation.\n");
3698 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3700 /* In case the vectorization factor (VF) is bigger than the number
3701 of elements that we can fit in a vectype (nunits), we have to generate
3702 more than one vector stmt - i.e - we need to "unroll" the
3703 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3704 from one copy of the vector stmt to the next, in the field
3705 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3706 stages to find the correct vector defs to be used when vectorizing
3707 stmts that use the defs of the current stmt. The example below
3708 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3709 we need to create 4 vectorized stmts):
3711 before vectorization:
3712 RELATED_STMT VEC_STMT
3716 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3718 RELATED_STMT VEC_STMT
3719 VS1_0: vx0 = memref0 VS1_1 -
3720 VS1_1: vx1 = memref1 VS1_2 -
3721 VS1_2: vx2 = memref2 VS1_3 -
3722 VS1_3: vx3 = memref3 - -
3723 S1: x = load - VS1_0
3726 step2: vectorize stmt S2 (done here):
3727 To vectorize stmt S2 we first need to find the relevant vector
3728 def for the first operand 'x'. This is, as usual, obtained from
3729 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3730 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3731 relevant vector def 'vx0'. Having found 'vx0' we can generate
3732 the vector stmt VS2_0, and as usual, record it in the
3733 STMT_VINFO_VEC_STMT of stmt S2.
3734 When creating the second copy (VS2_1), we obtain the relevant vector
3735 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3736 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3737 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3738 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3739 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3740 chain of stmts and pointers:
3741 RELATED_STMT VEC_STMT
3742 VS1_0: vx0 = memref0 VS1_1 -
3743 VS1_1: vx1 = memref1 VS1_2 -
3744 VS1_2: vx2 = memref2 VS1_3 -
3745 VS1_3: vx3 = memref3 - -
3746 S1: x = load - VS1_0
3747 VS2_0: vz0 = vx0 + v1 VS2_1 -
3748 VS2_1: vz1 = vx1 + v1 VS2_2 -
3749 VS2_2: vz2 = vx2 + v1 VS2_3 -
3750 VS2_3: vz3 = vx3 + v1 - -
3751 S2: z = x + 1 - VS2_0 */
3753 prev_stmt_info
= NULL
;
3754 for (j
= 0; j
< ncopies
; j
++)
3759 if (op_type
== binary_op
|| op_type
== ternary_op
)
3760 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3763 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3765 if (op_type
== ternary_op
)
3767 vec_oprnds2
.create (1);
3768 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
3775 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3776 if (op_type
== ternary_op
)
3778 tree vec_oprnd
= vec_oprnds2
.pop ();
3779 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
3784 /* Arguments are ready. Create the new vector stmt. */
3785 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3787 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3788 ? vec_oprnds1
[i
] : NULL_TREE
);
3789 vop2
= ((op_type
== ternary_op
)
3790 ? vec_oprnds2
[i
] : NULL_TREE
);
3791 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3793 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3794 gimple_assign_set_lhs (new_stmt
, new_temp
);
3795 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3797 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3804 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3806 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3807 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3810 vec_oprnds0
.release ();
3811 vec_oprnds1
.release ();
3812 vec_oprnds2
.release ();
3817 /* A helper function to ensure data reference DR's base alignment
3821 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
3826 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
3828 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3829 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
3831 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
3832 DECL_USER_ALIGN (base_decl
) = 1;
3833 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
3838 /* Function vectorizable_store.
3840 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3842 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3843 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3844 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3847 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3853 tree vec_oprnd
= NULL_TREE
;
3854 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3855 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3856 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3858 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3859 struct loop
*loop
= NULL
;
3860 enum machine_mode vec_mode
;
3862 enum dr_alignment_support alignment_support_scheme
;
3865 enum vect_def_type dt
;
3866 stmt_vec_info prev_stmt_info
= NULL
;
3867 tree dataref_ptr
= NULL_TREE
;
3868 tree dataref_offset
= NULL_TREE
;
3869 gimple ptr_incr
= NULL
;
3870 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3873 gimple next_stmt
, first_stmt
= NULL
;
3874 bool grouped_store
= false;
3875 bool store_lanes_p
= false;
3876 unsigned int group_size
, i
;
3877 vec
<tree
> dr_chain
= vNULL
;
3878 vec
<tree
> oprnds
= vNULL
;
3879 vec
<tree
> result_chain
= vNULL
;
3881 vec
<tree
> vec_oprnds
= vNULL
;
3882 bool slp
= (slp_node
!= NULL
);
3883 unsigned int vec_num
;
3884 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3888 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3890 /* Multiple types in SLP are handled by creating the appropriate number of
3891 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3893 if (slp
|| PURE_SLP_STMT (stmt_info
))
3896 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3898 gcc_assert (ncopies
>= 1);
3900 /* FORNOW. This restriction should be relaxed. */
3901 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3905 "multiple types in nested loop.\n");
3909 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3912 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3915 /* Is vectorizable store? */
3917 if (!is_gimple_assign (stmt
))
3920 scalar_dest
= gimple_assign_lhs (stmt
);
3921 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3922 && is_pattern_stmt_p (stmt_info
))
3923 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3924 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3925 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
3926 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3927 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3928 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3929 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3930 && TREE_CODE (scalar_dest
) != MEM_REF
)
3933 gcc_assert (gimple_assign_single_p (stmt
));
3934 op
= gimple_assign_rhs1 (stmt
);
3935 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3938 if (dump_enabled_p ())
3939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3940 "use not simple.\n");
3944 elem_type
= TREE_TYPE (vectype
);
3945 vec_mode
= TYPE_MODE (vectype
);
3947 /* FORNOW. In some cases can vectorize even if data-type not supported
3948 (e.g. - array initialization with 0). */
3949 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3952 if (!STMT_VINFO_DATA_REF (stmt_info
))
3955 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3956 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3957 size_zero_node
) < 0)
3959 if (dump_enabled_p ())
3960 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3961 "negative step for store.\n");
3965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3967 grouped_store
= true;
3968 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3969 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3971 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3972 if (vect_store_lanes_supported (vectype
, group_size
))
3973 store_lanes_p
= true;
3974 else if (!vect_grouped_store_supported (vectype
, group_size
))
3978 if (first_stmt
== stmt
)
3980 /* STMT is the leader of the group. Check the operands of all the
3981 stmts of the group. */
3982 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3985 gcc_assert (gimple_assign_single_p (next_stmt
));
3986 op
= gimple_assign_rhs1 (next_stmt
);
3987 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3988 &def_stmt
, &def
, &dt
))
3990 if (dump_enabled_p ())
3991 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3992 "use not simple.\n");
3995 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4000 if (!vec_stmt
) /* transformation not required. */
4002 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
4003 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
4010 ensure_base_align (stmt_info
, dr
);
4014 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4015 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4017 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
4020 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
4022 /* We vectorize all the stmts of the interleaving group when we
4023 reach the last stmt in the group. */
4024 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
4025 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
4034 grouped_store
= false;
4035 /* VEC_NUM is the number of vect stmts to be created for this
4037 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4038 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4039 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4040 op
= gimple_assign_rhs1 (first_stmt
);
4043 /* VEC_NUM is the number of vect stmts to be created for this
4045 vec_num
= group_size
;
4051 group_size
= vec_num
= 1;
4054 if (dump_enabled_p ())
4055 dump_printf_loc (MSG_NOTE
, vect_location
,
4056 "transform store. ncopies = %d\n", ncopies
);
4058 dr_chain
.create (group_size
);
4059 oprnds
.create (group_size
);
4061 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4062 gcc_assert (alignment_support_scheme
);
4063 /* Targets with store-lane instructions must not require explicit
4065 gcc_assert (!store_lanes_p
4066 || alignment_support_scheme
== dr_aligned
4067 || alignment_support_scheme
== dr_unaligned_supported
);
4070 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4072 aggr_type
= vectype
;
4074 /* In case the vectorization factor (VF) is bigger than the number
4075 of elements that we can fit in a vectype (nunits), we have to generate
4076 more than one vector stmt - i.e - we need to "unroll" the
4077 vector stmt by a factor VF/nunits. For more details see documentation in
4078 vect_get_vec_def_for_copy_stmt. */
4080 /* In case of interleaving (non-unit grouped access):
4087 We create vectorized stores starting from base address (the access of the
4088 first stmt in the chain (S2 in the above example), when the last store stmt
4089 of the chain (S4) is reached:
4092 VS2: &base + vec_size*1 = vx0
4093 VS3: &base + vec_size*2 = vx1
4094 VS4: &base + vec_size*3 = vx3
4096 Then permutation statements are generated:
4098 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4099 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4102 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4103 (the order of the data-refs in the output of vect_permute_store_chain
4104 corresponds to the order of scalar stmts in the interleaving chain - see
4105 the documentation of vect_permute_store_chain()).
4107 In case of both multiple types and interleaving, above vector stores and
4108 permutation stmts are created for every copy. The result vector stmts are
4109 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4110 STMT_VINFO_RELATED_STMT for the next copies.
4113 prev_stmt_info
= NULL
;
4114 for (j
= 0; j
< ncopies
; j
++)
4122 /* Get vectorized arguments for SLP_NODE. */
4123 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4124 NULL
, slp_node
, -1);
4126 vec_oprnd
= vec_oprnds
[0];
4130 /* For interleaved stores we collect vectorized defs for all the
4131 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4132 used as an input to vect_permute_store_chain(), and OPRNDS as
4133 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4135 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4136 OPRNDS are of size 1. */
4137 next_stmt
= first_stmt
;
4138 for (i
= 0; i
< group_size
; i
++)
4140 /* Since gaps are not supported for interleaved stores,
4141 GROUP_SIZE is the exact number of stmts in the chain.
4142 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4143 there is no interleaving, GROUP_SIZE is 1, and only one
4144 iteration of the loop will be executed. */
4145 gcc_assert (next_stmt
4146 && gimple_assign_single_p (next_stmt
));
4147 op
= gimple_assign_rhs1 (next_stmt
);
4149 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4151 dr_chain
.quick_push (vec_oprnd
);
4152 oprnds
.quick_push (vec_oprnd
);
4153 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4157 /* We should have catched mismatched types earlier. */
4158 gcc_assert (useless_type_conversion_p (vectype
,
4159 TREE_TYPE (vec_oprnd
)));
4160 bool simd_lane_access_p
4161 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
4162 if (simd_lane_access_p
4163 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
4164 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
4165 && integer_zerop (DR_OFFSET (first_dr
))
4166 && integer_zerop (DR_INIT (first_dr
))
4167 && alias_sets_conflict_p (get_alias_set (aggr_type
),
4168 get_alias_set (DR_REF (first_dr
))))
4170 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
4171 dataref_offset
= build_int_cst (reference_alias_ptr_type
4172 (DR_REF (first_dr
)), 0);
4177 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
4178 simd_lane_access_p
? loop
: NULL
,
4179 NULL_TREE
, &dummy
, gsi
, &ptr_incr
,
4180 simd_lane_access_p
, &inv_p
);
4181 gcc_assert (bb_vinfo
|| !inv_p
);
4185 /* For interleaved stores we created vectorized defs for all the
4186 defs stored in OPRNDS in the previous iteration (previous copy).
4187 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4188 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4190 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4191 OPRNDS are of size 1. */
4192 for (i
= 0; i
< group_size
; i
++)
4195 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4197 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4198 dr_chain
[i
] = vec_oprnd
;
4199 oprnds
[i
] = vec_oprnd
;
4203 = int_const_binop (PLUS_EXPR
, dataref_offset
,
4204 TYPE_SIZE_UNIT (aggr_type
));
4206 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4207 TYPE_SIZE_UNIT (aggr_type
));
4214 /* Combine all the vectors into an array. */
4215 vec_array
= create_vector_array (vectype
, vec_num
);
4216 for (i
= 0; i
< vec_num
; i
++)
4218 vec_oprnd
= dr_chain
[i
];
4219 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4223 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4224 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4225 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4226 gimple_call_set_lhs (new_stmt
, data_ref
);
4227 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4235 result_chain
.create (group_size
);
4237 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4241 next_stmt
= first_stmt
;
4242 for (i
= 0; i
< vec_num
; i
++)
4244 unsigned align
, misalign
;
4247 /* Bump the vector pointer. */
4248 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4252 vec_oprnd
= vec_oprnds
[i
];
4253 else if (grouped_store
)
4254 /* For grouped stores vectorized defs are interleaved in
4255 vect_permute_store_chain(). */
4256 vec_oprnd
= result_chain
[i
];
4258 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4261 : build_int_cst (reference_alias_ptr_type
4262 (DR_REF (first_dr
)), 0));
4263 align
= TYPE_ALIGN_UNIT (vectype
);
4264 if (aligned_access_p (first_dr
))
4266 else if (DR_MISALIGNMENT (first_dr
) == -1)
4268 TREE_TYPE (data_ref
)
4269 = build_aligned_type (TREE_TYPE (data_ref
),
4270 TYPE_ALIGN (elem_type
));
4271 align
= TYPE_ALIGN_UNIT (elem_type
);
4276 TREE_TYPE (data_ref
)
4277 = build_aligned_type (TREE_TYPE (data_ref
),
4278 TYPE_ALIGN (elem_type
));
4279 misalign
= DR_MISALIGNMENT (first_dr
);
4281 if (dataref_offset
== NULL_TREE
)
4282 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4285 /* Arguments are ready. Create the new vector stmt. */
4286 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4287 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4292 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4300 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4302 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4303 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4307 dr_chain
.release ();
4309 result_chain
.release ();
4310 vec_oprnds
.release ();
4315 /* Given a vector type VECTYPE and permutation SEL returns
4316 the VECTOR_CST mask that implements the permutation of the
4317 vector elements. If that is impossible to do, returns NULL. */
4320 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4322 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4325 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4327 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4330 mask_elt_type
= lang_hooks
.types
.type_for_mode
4331 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4332 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4334 mask_elts
= XALLOCAVEC (tree
, nunits
);
4335 for (i
= nunits
- 1; i
>= 0; i
--)
4336 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4337 mask_vec
= build_vector (mask_type
, mask_elts
);
4342 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4343 reversal of the vector elements. If that is impossible to do,
4347 perm_mask_for_reverse (tree vectype
)
4352 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4353 sel
= XALLOCAVEC (unsigned char, nunits
);
4355 for (i
= 0; i
< nunits
; ++i
)
4356 sel
[i
] = nunits
- 1 - i
;
4358 return vect_gen_perm_mask (vectype
, sel
);
4361 /* Given a vector variable X and Y, that was generated for the scalar
4362 STMT, generate instructions to permute the vector elements of X and Y
4363 using permutation mask MASK_VEC, insert them at *GSI and return the
4364 permuted vector variable. */
4367 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4368 gimple_stmt_iterator
*gsi
)
4370 tree vectype
= TREE_TYPE (x
);
4371 tree perm_dest
, data_ref
;
4374 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4375 data_ref
= make_ssa_name (perm_dest
, NULL
);
4377 /* Generate the permute statement. */
4378 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4380 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4385 /* vectorizable_load.
4387 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4389 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4390 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4391 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4394 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4395 slp_tree slp_node
, slp_instance slp_node_instance
)
4398 tree vec_dest
= NULL
;
4399 tree data_ref
= NULL
;
4400 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4401 stmt_vec_info prev_stmt_info
;
4402 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4403 struct loop
*loop
= NULL
;
4404 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4405 bool nested_in_vect_loop
= false;
4406 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4407 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4410 enum machine_mode mode
;
4411 gimple new_stmt
= NULL
;
4413 enum dr_alignment_support alignment_support_scheme
;
4414 tree dataref_ptr
= NULL_TREE
;
4415 tree dataref_offset
= NULL_TREE
;
4416 gimple ptr_incr
= NULL
;
4417 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4419 int i
, j
, group_size
, group_gap
;
4420 tree msq
= NULL_TREE
, lsq
;
4421 tree offset
= NULL_TREE
;
4422 tree realignment_token
= NULL_TREE
;
4424 vec
<tree
> dr_chain
= vNULL
;
4425 bool grouped_load
= false;
4426 bool load_lanes_p
= false;
4429 bool negative
= false;
4430 bool compute_in_loop
= false;
4431 struct loop
*at_loop
;
4433 bool slp
= (slp_node
!= NULL
);
4434 bool slp_perm
= false;
4435 enum tree_code code
;
4436 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4439 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4440 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4441 int gather_scale
= 1;
4442 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4446 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4447 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4448 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4453 /* Multiple types in SLP are handled by creating the appropriate number of
4454 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4456 if (slp
|| PURE_SLP_STMT (stmt_info
))
4459 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4461 gcc_assert (ncopies
>= 1);
4463 /* FORNOW. This restriction should be relaxed. */
4464 if (nested_in_vect_loop
&& ncopies
> 1)
4466 if (dump_enabled_p ())
4467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4468 "multiple types in nested loop.\n");
4472 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4475 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4478 /* Is vectorizable load? */
4479 if (!is_gimple_assign (stmt
))
4482 scalar_dest
= gimple_assign_lhs (stmt
);
4483 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4486 code
= gimple_assign_rhs_code (stmt
);
4487 if (code
!= ARRAY_REF
4488 && code
!= BIT_FIELD_REF
4489 && code
!= INDIRECT_REF
4490 && code
!= COMPONENT_REF
4491 && code
!= IMAGPART_EXPR
4492 && code
!= REALPART_EXPR
4494 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4497 if (!STMT_VINFO_DATA_REF (stmt_info
))
4500 elem_type
= TREE_TYPE (vectype
);
4501 mode
= TYPE_MODE (vectype
);
4503 /* FORNOW. In some cases can vectorize even if data-type not supported
4504 (e.g. - data copies). */
4505 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4507 if (dump_enabled_p ())
4508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4509 "Aligned load, but unsupported type.\n");
4513 /* Check if the load is a part of an interleaving chain. */
4514 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4516 grouped_load
= true;
4518 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4520 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4521 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4523 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4524 if (vect_load_lanes_supported (vectype
, group_size
))
4525 load_lanes_p
= true;
4526 else if (!vect_grouped_load_supported (vectype
, group_size
))
4532 if (STMT_VINFO_GATHER_P (stmt_info
))
4536 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4537 &gather_off
, &gather_scale
);
4538 gcc_assert (gather_decl
);
4539 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4540 &def_stmt
, &def
, &gather_dt
,
4541 &gather_off_vectype
))
4543 if (dump_enabled_p ())
4544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4545 "gather index use not simple.\n");
4549 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4553 negative
= tree_int_cst_compare (nested_in_vect_loop
4554 ? STMT_VINFO_DR_STEP (stmt_info
)
4556 size_zero_node
) < 0;
4557 if (negative
&& ncopies
> 1)
4559 if (dump_enabled_p ())
4560 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4561 "multiple types with negative step.\n");
4569 if (dump_enabled_p ())
4570 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4571 "negative step for group load not supported"
4575 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4576 if (alignment_support_scheme
!= dr_aligned
4577 && alignment_support_scheme
!= dr_unaligned_supported
)
4579 if (dump_enabled_p ())
4580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4581 "negative step but alignment required.\n");
4584 if (!perm_mask_for_reverse (vectype
))
4586 if (dump_enabled_p ())
4587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4588 "negative step and reversing not supported."
4595 if (!vec_stmt
) /* transformation not required. */
4597 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4598 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4602 if (dump_enabled_p ())
4603 dump_printf_loc (MSG_NOTE
, vect_location
,
4604 "transform load. ncopies = %d\n", ncopies
);
4608 ensure_base_align (stmt_info
, dr
);
4610 if (STMT_VINFO_GATHER_P (stmt_info
))
4612 tree vec_oprnd0
= NULL_TREE
, op
;
4613 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4614 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4615 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4616 edge pe
= loop_preheader_edge (loop
);
4619 enum { NARROW
, NONE
, WIDEN
} modifier
;
4620 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4622 if (nunits
== gather_off_nunits
)
4624 else if (nunits
== gather_off_nunits
/ 2)
4626 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4629 for (i
= 0; i
< gather_off_nunits
; ++i
)
4630 sel
[i
] = i
| nunits
;
4632 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4633 gcc_assert (perm_mask
!= NULL_TREE
);
4635 else if (nunits
== gather_off_nunits
* 2)
4637 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4640 for (i
= 0; i
< nunits
; ++i
)
4641 sel
[i
] = i
< gather_off_nunits
4642 ? i
: i
+ nunits
- gather_off_nunits
;
4644 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4645 gcc_assert (perm_mask
!= NULL_TREE
);
4651 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4652 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4653 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4654 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4655 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4656 scaletype
= TREE_VALUE (arglist
);
4657 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4658 && types_compatible_p (srctype
, masktype
));
4660 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4662 ptr
= fold_convert (ptrtype
, gather_base
);
4663 if (!is_gimple_min_invariant (ptr
))
4665 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4666 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4667 gcc_assert (!new_bb
);
4670 /* Currently we support only unconditional gather loads,
4671 so mask should be all ones. */
4672 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4673 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4674 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4678 for (j
= 0; j
< 6; ++j
)
4680 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4681 mask
= build_real (TREE_TYPE (masktype
), r
);
4685 mask
= build_vector_from_val (masktype
, mask
);
4686 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4688 scale
= build_int_cst (scaletype
, gather_scale
);
4690 prev_stmt_info
= NULL
;
4691 for (j
= 0; j
< ncopies
; ++j
)
4693 if (modifier
== WIDEN
&& (j
& 1))
4694 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4695 perm_mask
, stmt
, gsi
);
4698 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4701 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4703 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4705 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4706 == TYPE_VECTOR_SUBPARTS (idxtype
));
4707 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4708 var
= make_ssa_name (var
, NULL
);
4709 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4711 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4713 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4718 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4720 if (!useless_type_conversion_p (vectype
, rettype
))
4722 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4723 == TYPE_VECTOR_SUBPARTS (rettype
));
4724 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4725 op
= make_ssa_name (var
, new_stmt
);
4726 gimple_call_set_lhs (new_stmt
, op
);
4727 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4728 var
= make_ssa_name (vec_dest
, NULL
);
4729 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4731 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4736 var
= make_ssa_name (vec_dest
, new_stmt
);
4737 gimple_call_set_lhs (new_stmt
, var
);
4740 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4742 if (modifier
== NARROW
)
4749 var
= permute_vec_elements (prev_res
, var
,
4750 perm_mask
, stmt
, gsi
);
4751 new_stmt
= SSA_NAME_DEF_STMT (var
);
4754 if (prev_stmt_info
== NULL
)
4755 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4757 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4758 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4762 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4764 gimple_stmt_iterator incr_gsi
;
4770 vec
<constructor_elt
, va_gc
> *v
= NULL
;
4771 gimple_seq stmts
= NULL
;
4772 tree stride_base
, stride_step
, alias_off
;
4774 gcc_assert (!nested_in_vect_loop
);
4777 = fold_build_pointer_plus
4778 (unshare_expr (DR_BASE_ADDRESS (dr
)),
4779 size_binop (PLUS_EXPR
,
4780 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
4781 convert_to_ptrofftype (DR_INIT (dr
))));
4782 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
4784 /* For a load with loop-invariant (but other than power-of-2)
4785 stride (i.e. not a grouped access) like so:
4787 for (i = 0; i < n; i += stride)
4790 we generate a new induction variable and new accesses to
4791 form a new vector (or vectors, depending on ncopies):
4793 for (j = 0; ; j += VF*stride)
4795 tmp2 = array[j + stride];
4797 vectemp = {tmp1, tmp2, ...}
4800 ivstep
= stride_step
;
4801 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4802 build_int_cst (TREE_TYPE (ivstep
), vf
));
4804 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4806 create_iv (stride_base
, ivstep
, NULL
,
4807 loop
, &incr_gsi
, insert_after
,
4809 incr
= gsi_stmt (incr_gsi
);
4810 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4812 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4814 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4816 prev_stmt_info
= NULL
;
4817 running_off
= offvar
;
4818 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
4819 for (j
= 0; j
< ncopies
; j
++)
4823 vec_alloc (v
, nunits
);
4824 for (i
= 0; i
< nunits
; i
++)
4826 tree newref
, newoff
;
4828 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
4829 running_off
, alias_off
);
4831 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4834 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4835 newoff
= copy_ssa_name (running_off
, NULL
);
4836 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4837 running_off
, stride_step
);
4838 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4840 running_off
= newoff
;
4843 vec_inv
= build_constructor (vectype
, v
);
4844 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4845 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4848 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4850 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4851 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4858 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4860 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
4861 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
4862 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4864 /* Check if the chain of loads is already vectorized. */
4865 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
4866 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4867 ??? But we can only do so if there is exactly one
4868 as we have no way to get at the rest. Leave the CSE
4870 ??? With the group load eventually participating
4871 in multiple different permutations (having multiple
4872 slp nodes which refer to the same group) the CSE
4873 is even wrong code. See PR56270. */
4876 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4879 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4880 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4882 /* VEC_NUM is the number of vect stmts to be created for this group. */
4885 grouped_load
= false;
4886 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4887 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
4889 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
4893 vec_num
= group_size
;
4901 group_size
= vec_num
= 1;
4905 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4906 gcc_assert (alignment_support_scheme
);
4907 /* Targets with load-lane instructions must not require explicit
4909 gcc_assert (!load_lanes_p
4910 || alignment_support_scheme
== dr_aligned
4911 || alignment_support_scheme
== dr_unaligned_supported
);
4913 /* In case the vectorization factor (VF) is bigger than the number
4914 of elements that we can fit in a vectype (nunits), we have to generate
4915 more than one vector stmt - i.e - we need to "unroll" the
4916 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4917 from one copy of the vector stmt to the next, in the field
4918 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4919 stages to find the correct vector defs to be used when vectorizing
4920 stmts that use the defs of the current stmt. The example below
4921 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4922 need to create 4 vectorized stmts):
4924 before vectorization:
4925 RELATED_STMT VEC_STMT
4929 step 1: vectorize stmt S1:
4930 We first create the vector stmt VS1_0, and, as usual, record a
4931 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4932 Next, we create the vector stmt VS1_1, and record a pointer to
4933 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4934 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4936 RELATED_STMT VEC_STMT
4937 VS1_0: vx0 = memref0 VS1_1 -
4938 VS1_1: vx1 = memref1 VS1_2 -
4939 VS1_2: vx2 = memref2 VS1_3 -
4940 VS1_3: vx3 = memref3 - -
4941 S1: x = load - VS1_0
4944 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4945 information we recorded in RELATED_STMT field is used to vectorize
4948 /* In case of interleaving (non-unit grouped access):
4955 Vectorized loads are created in the order of memory accesses
4956 starting from the access of the first stmt of the chain:
4959 VS2: vx1 = &base + vec_size*1
4960 VS3: vx3 = &base + vec_size*2
4961 VS4: vx4 = &base + vec_size*3
4963 Then permutation statements are generated:
4965 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4966 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4969 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4970 (the order of the data-refs in the output of vect_permute_load_chain
4971 corresponds to the order of scalar stmts in the interleaving chain - see
4972 the documentation of vect_permute_load_chain()).
4973 The generation of permutation stmts and recording them in
4974 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4976 In case of both multiple types and interleaving, the vector loads and
4977 permutation stmts above are created for every copy. The result vector
4978 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4979 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4981 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4982 on a target that supports unaligned accesses (dr_unaligned_supported)
4983 we generate the following code:
4987 p = p + indx * vectype_size;
4992 Otherwise, the data reference is potentially unaligned on a target that
4993 does not support unaligned accesses (dr_explicit_realign_optimized) -
4994 then generate the following code, in which the data in each iteration is
4995 obtained by two vector loads, one from the previous iteration, and one
4996 from the current iteration:
4998 msq_init = *(floor(p1))
4999 p2 = initial_addr + VS - 1;
5000 realignment_token = call target_builtin;
5003 p2 = p2 + indx * vectype_size
5005 vec_dest = realign_load (msq, lsq, realignment_token)
5010 /* If the misalignment remains the same throughout the execution of the
5011 loop, we can create the init_addr and permutation mask at the loop
5012 preheader. Otherwise, it needs to be created inside the loop.
5013 This can only occur when vectorizing memory accesses in the inner-loop
5014 nested within an outer-loop that is being vectorized. */
5016 if (nested_in_vect_loop
5017 && (TREE_INT_CST_LOW (DR_STEP (dr
))
5018 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
5020 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
5021 compute_in_loop
= true;
5024 if ((alignment_support_scheme
== dr_explicit_realign_optimized
5025 || alignment_support_scheme
== dr_explicit_realign
)
5026 && !compute_in_loop
)
5028 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
5029 alignment_support_scheme
, NULL_TREE
,
5031 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5033 phi
= SSA_NAME_DEF_STMT (msq
);
5034 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5041 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5044 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5046 aggr_type
= vectype
;
5048 prev_stmt_info
= NULL
;
5049 for (j
= 0; j
< ncopies
; j
++)
5051 /* 1. Create the vector or array pointer update chain. */
5054 bool simd_lane_access_p
5055 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5056 if (simd_lane_access_p
5057 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5058 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5059 && integer_zerop (DR_OFFSET (first_dr
))
5060 && integer_zerop (DR_INIT (first_dr
))
5061 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5062 get_alias_set (DR_REF (first_dr
)))
5063 && (alignment_support_scheme
== dr_aligned
5064 || alignment_support_scheme
== dr_unaligned_supported
))
5066 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5067 dataref_offset
= build_int_cst (reference_alias_ptr_type
5068 (DR_REF (first_dr
)), 0);
5073 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
5074 offset
, &dummy
, gsi
, &ptr_incr
,
5075 simd_lane_access_p
, &inv_p
);
5077 else if (dataref_offset
)
5078 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
5079 TYPE_SIZE_UNIT (aggr_type
));
5081 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5082 TYPE_SIZE_UNIT (aggr_type
));
5084 if (grouped_load
|| slp_perm
)
5085 dr_chain
.create (vec_num
);
5091 vec_array
= create_vector_array (vectype
, vec_num
);
5094 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5095 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5096 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
5097 gimple_call_set_lhs (new_stmt
, vec_array
);
5098 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5100 /* Extract each vector into an SSA_NAME. */
5101 for (i
= 0; i
< vec_num
; i
++)
5103 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
5105 dr_chain
.quick_push (new_temp
);
5108 /* Record the mapping between SSA_NAMEs and statements. */
5109 vect_record_grouped_load_vectors (stmt
, dr_chain
);
5113 for (i
= 0; i
< vec_num
; i
++)
5116 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5119 /* 2. Create the vector-load in the loop. */
5120 switch (alignment_support_scheme
)
5123 case dr_unaligned_supported
:
5125 unsigned int align
, misalign
;
5128 = build2 (MEM_REF
, vectype
, dataref_ptr
,
5131 : build_int_cst (reference_alias_ptr_type
5132 (DR_REF (first_dr
)), 0));
5133 align
= TYPE_ALIGN_UNIT (vectype
);
5134 if (alignment_support_scheme
== dr_aligned
)
5136 gcc_assert (aligned_access_p (first_dr
));
5139 else if (DR_MISALIGNMENT (first_dr
) == -1)
5141 TREE_TYPE (data_ref
)
5142 = build_aligned_type (TREE_TYPE (data_ref
),
5143 TYPE_ALIGN (elem_type
));
5144 align
= TYPE_ALIGN_UNIT (elem_type
);
5149 TREE_TYPE (data_ref
)
5150 = build_aligned_type (TREE_TYPE (data_ref
),
5151 TYPE_ALIGN (elem_type
));
5152 misalign
= DR_MISALIGNMENT (first_dr
);
5154 if (dataref_offset
== NULL_TREE
)
5155 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
5159 case dr_explicit_realign
:
5164 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5166 if (compute_in_loop
)
5167 msq
= vect_setup_realignment (first_stmt
, gsi
,
5169 dr_explicit_realign
,
5172 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
5173 new_stmt
= gimple_build_assign_with_ops
5174 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
5176 (TREE_TYPE (dataref_ptr
),
5177 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5178 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5180 = build2 (MEM_REF
, vectype
, ptr
,
5181 build_int_cst (reference_alias_ptr_type
5182 (DR_REF (first_dr
)), 0));
5183 vec_dest
= vect_create_destination_var (scalar_dest
,
5185 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5186 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5187 gimple_assign_set_lhs (new_stmt
, new_temp
);
5188 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5189 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5190 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5193 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5194 TYPE_SIZE_UNIT (elem_type
));
5195 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5196 new_stmt
= gimple_build_assign_with_ops
5197 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5200 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5201 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
5202 gimple_assign_set_lhs (new_stmt
, ptr
);
5203 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5205 = build2 (MEM_REF
, vectype
, ptr
,
5206 build_int_cst (reference_alias_ptr_type
5207 (DR_REF (first_dr
)), 0));
5210 case dr_explicit_realign_optimized
:
5211 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5212 new_stmt
= gimple_build_assign_with_ops
5213 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5215 (TREE_TYPE (dataref_ptr
),
5216 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5217 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5219 = build2 (MEM_REF
, vectype
, new_temp
,
5220 build_int_cst (reference_alias_ptr_type
5221 (DR_REF (first_dr
)), 0));
5226 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5227 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5228 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5229 gimple_assign_set_lhs (new_stmt
, new_temp
);
5230 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5232 /* 3. Handle explicit realignment if necessary/supported.
5234 vec_dest = realign_load (msq, lsq, realignment_token) */
5235 if (alignment_support_scheme
== dr_explicit_realign_optimized
5236 || alignment_support_scheme
== dr_explicit_realign
)
5238 lsq
= gimple_assign_lhs (new_stmt
);
5239 if (!realignment_token
)
5240 realignment_token
= dataref_ptr
;
5241 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5243 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
5246 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5247 gimple_assign_set_lhs (new_stmt
, new_temp
);
5248 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5250 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5253 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5254 add_phi_arg (phi
, lsq
,
5255 loop_latch_edge (containing_loop
),
5261 /* 4. Handle invariant-load. */
5262 if (inv_p
&& !bb_vinfo
)
5264 gimple_stmt_iterator gsi2
= *gsi
;
5265 gcc_assert (!grouped_load
);
5267 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5269 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5274 tree perm_mask
= perm_mask_for_reverse (vectype
);
5275 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5276 perm_mask
, stmt
, gsi
);
5277 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5280 /* Collect vector loads and later create their permutation in
5281 vect_transform_grouped_load (). */
5282 if (grouped_load
|| slp_perm
)
5283 dr_chain
.quick_push (new_temp
);
5285 /* Store vector loads in the corresponding SLP_NODE. */
5286 if (slp
&& !slp_perm
)
5287 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5289 /* Bump the vector pointer to account for a gap. */
5290 if (slp
&& group_gap
!= 0)
5292 tree bump
= size_binop (MULT_EXPR
,
5293 TYPE_SIZE_UNIT (elem_type
),
5294 size_int (group_gap
));
5295 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5300 if (slp
&& !slp_perm
)
5305 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
5306 slp_node_instance
, false))
5308 dr_chain
.release ();
5317 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5318 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5323 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5325 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5326 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5329 dr_chain
.release ();
5335 /* Function vect_is_simple_cond.
5338 LOOP - the loop that is being vectorized.
5339 COND - Condition that is checked for simple use.
5342 *COMP_VECTYPE - the vector type for the comparison.
5344 Returns whether a COND can be vectorized. Checks whether
5345 condition operands are supportable using vec_is_simple_use. */
5348 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5349 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5353 enum vect_def_type dt
;
5354 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5356 if (!COMPARISON_CLASS_P (cond
))
5359 lhs
= TREE_OPERAND (cond
, 0);
5360 rhs
= TREE_OPERAND (cond
, 1);
5362 if (TREE_CODE (lhs
) == SSA_NAME
)
5364 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5365 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5366 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5369 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5370 && TREE_CODE (lhs
) != FIXED_CST
)
5373 if (TREE_CODE (rhs
) == SSA_NAME
)
5375 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5376 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5377 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5380 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5381 && TREE_CODE (rhs
) != FIXED_CST
)
5384 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5388 /* vectorizable_condition.
5390 Check if STMT is conditional modify expression that can be vectorized.
5391 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5392 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5395 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5396 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5397 else caluse if it is 2).
5399 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5402 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5403 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5406 tree scalar_dest
= NULL_TREE
;
5407 tree vec_dest
= NULL_TREE
;
5408 tree cond_expr
, then_clause
, else_clause
;
5409 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5410 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5411 tree comp_vectype
= NULL_TREE
;
5412 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5413 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5414 tree vec_compare
, vec_cond_expr
;
5416 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5418 enum vect_def_type dt
, dts
[4];
5419 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5421 enum tree_code code
;
5422 stmt_vec_info prev_stmt_info
= NULL
;
5424 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5425 vec
<tree
> vec_oprnds0
= vNULL
;
5426 vec
<tree
> vec_oprnds1
= vNULL
;
5427 vec
<tree
> vec_oprnds2
= vNULL
;
5428 vec
<tree
> vec_oprnds3
= vNULL
;
5431 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5434 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5436 gcc_assert (ncopies
>= 1);
5437 if (reduc_index
&& ncopies
> 1)
5438 return false; /* FORNOW */
5440 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5443 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5446 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5447 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5451 /* FORNOW: not yet supported. */
5452 if (STMT_VINFO_LIVE_P (stmt_info
))
5454 if (dump_enabled_p ())
5455 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5456 "value used after loop.\n");
5460 /* Is vectorizable conditional operation? */
5461 if (!is_gimple_assign (stmt
))
5464 code
= gimple_assign_rhs_code (stmt
);
5466 if (code
!= COND_EXPR
)
5469 cond_expr
= gimple_assign_rhs1 (stmt
);
5470 then_clause
= gimple_assign_rhs2 (stmt
);
5471 else_clause
= gimple_assign_rhs3 (stmt
);
5473 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5478 if (TREE_CODE (then_clause
) == SSA_NAME
)
5480 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5481 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5482 &then_def_stmt
, &def
, &dt
))
5485 else if (TREE_CODE (then_clause
) != INTEGER_CST
5486 && TREE_CODE (then_clause
) != REAL_CST
5487 && TREE_CODE (then_clause
) != FIXED_CST
)
5490 if (TREE_CODE (else_clause
) == SSA_NAME
)
5492 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5493 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5494 &else_def_stmt
, &def
, &dt
))
5497 else if (TREE_CODE (else_clause
) != INTEGER_CST
5498 && TREE_CODE (else_clause
) != REAL_CST
5499 && TREE_CODE (else_clause
) != FIXED_CST
)
5502 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
5503 /* The result of a vector comparison should be signed type. */
5504 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
5505 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
5506 if (vec_cmp_type
== NULL_TREE
)
5511 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5512 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5519 vec_oprnds0
.create (1);
5520 vec_oprnds1
.create (1);
5521 vec_oprnds2
.create (1);
5522 vec_oprnds3
.create (1);
5526 scalar_dest
= gimple_assign_lhs (stmt
);
5527 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5529 /* Handle cond expr. */
5530 for (j
= 0; j
< ncopies
; j
++)
5532 gimple new_stmt
= NULL
;
5537 stack_vec
<tree
, 4> ops
;
5538 stack_vec
<vec
<tree
>, 4> vec_defs
;
5540 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
5541 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
5542 ops
.safe_push (then_clause
);
5543 ops
.safe_push (else_clause
);
5544 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5545 vec_oprnds3
= vec_defs
.pop ();
5546 vec_oprnds2
= vec_defs
.pop ();
5547 vec_oprnds1
= vec_defs
.pop ();
5548 vec_oprnds0
= vec_defs
.pop ();
5551 vec_defs
.release ();
5557 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5559 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5560 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5563 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5565 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5566 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5567 if (reduc_index
== 1)
5568 vec_then_clause
= reduc_def
;
5571 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5573 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5574 NULL
, >emp
, &def
, &dts
[2]);
5576 if (reduc_index
== 2)
5577 vec_else_clause
= reduc_def
;
5580 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5582 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5583 NULL
, >emp
, &def
, &dts
[3]);
5589 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5590 vec_oprnds0
.pop ());
5591 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5592 vec_oprnds1
.pop ());
5593 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5594 vec_oprnds2
.pop ());
5595 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5596 vec_oprnds3
.pop ());
5601 vec_oprnds0
.quick_push (vec_cond_lhs
);
5602 vec_oprnds1
.quick_push (vec_cond_rhs
);
5603 vec_oprnds2
.quick_push (vec_then_clause
);
5604 vec_oprnds3
.quick_push (vec_else_clause
);
5607 /* Arguments are ready. Create the new vector stmt. */
5608 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
5610 vec_cond_rhs
= vec_oprnds1
[i
];
5611 vec_then_clause
= vec_oprnds2
[i
];
5612 vec_else_clause
= vec_oprnds3
[i
];
5614 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
5615 vec_cond_lhs
, vec_cond_rhs
);
5616 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5617 vec_compare
, vec_then_clause
, vec_else_clause
);
5619 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5620 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5621 gimple_assign_set_lhs (new_stmt
, new_temp
);
5622 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5624 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5631 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5633 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5635 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5638 vec_oprnds0
.release ();
5639 vec_oprnds1
.release ();
5640 vec_oprnds2
.release ();
5641 vec_oprnds3
.release ();
5647 /* Make sure the statement is vectorizable. */
5650 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5652 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5653 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5654 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5656 tree scalar_type
, vectype
;
5657 gimple pattern_stmt
;
5658 gimple_seq pattern_def_seq
;
5660 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
5663 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5664 dump_printf (MSG_NOTE
, "\n");
5667 if (gimple_has_volatile_ops (stmt
))
5669 if (dump_enabled_p ())
5670 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5671 "not vectorized: stmt has volatile operands\n");
5676 /* Skip stmts that do not need to be vectorized. In loops this is expected
5678 - the COND_EXPR which is the loop exit condition
5679 - any LABEL_EXPRs in the loop
5680 - computations that are used only for array indexing or loop control.
5681 In basic blocks we only analyze statements that are a part of some SLP
5682 instance, therefore, all the statements are relevant.
5684 Pattern statement needs to be analyzed instead of the original statement
5685 if the original statement is not relevant. Otherwise, we analyze both
5686 statements. In basic blocks we are called from some SLP instance
5687 traversal, don't analyze pattern stmts instead, the pattern stmts
5688 already will be part of SLP instance. */
5690 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5691 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5692 && !STMT_VINFO_LIVE_P (stmt_info
))
5694 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5696 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5697 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5699 /* Analyze PATTERN_STMT instead of the original stmt. */
5700 stmt
= pattern_stmt
;
5701 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5702 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_NOTE
, vect_location
,
5705 "==> examining pattern statement: ");
5706 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5707 dump_printf (MSG_NOTE
, "\n");
5712 if (dump_enabled_p ())
5713 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
5718 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5721 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5722 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5724 /* Analyze PATTERN_STMT too. */
5725 if (dump_enabled_p ())
5727 dump_printf_loc (MSG_NOTE
, vect_location
,
5728 "==> examining pattern statement: ");
5729 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5730 dump_printf (MSG_NOTE
, "\n");
5733 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5737 if (is_pattern_stmt_p (stmt_info
)
5739 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5741 gimple_stmt_iterator si
;
5743 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5745 gimple pattern_def_stmt
= gsi_stmt (si
);
5746 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5747 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5749 /* Analyze def stmt of STMT if it's a pattern stmt. */
5750 if (dump_enabled_p ())
5752 dump_printf_loc (MSG_NOTE
, vect_location
,
5753 "==> examining pattern def statement: ");
5754 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
5755 dump_printf (MSG_NOTE
, "\n");
5758 if (!vect_analyze_stmt (pattern_def_stmt
,
5759 need_to_vectorize
, node
))
5765 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5767 case vect_internal_def
:
5770 case vect_reduction_def
:
5771 case vect_nested_cycle
:
5772 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5773 || relevance
== vect_used_in_outer_by_reduction
5774 || relevance
== vect_unused_in_scope
));
5777 case vect_induction_def
:
5778 case vect_constant_def
:
5779 case vect_external_def
:
5780 case vect_unknown_def_type
:
5787 gcc_assert (PURE_SLP_STMT (stmt_info
));
5789 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5790 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_NOTE
, vect_location
,
5793 "get vectype for scalar type: ");
5794 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
5795 dump_printf (MSG_NOTE
, "\n");
5798 vectype
= get_vectype_for_scalar_type (scalar_type
);
5801 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5804 "not SLPed: unsupported data-type ");
5805 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5807 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5812 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
5815 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
5816 dump_printf (MSG_NOTE
, "\n");
5819 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5822 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5824 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5825 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5826 *need_to_vectorize
= true;
5831 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5832 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5833 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5834 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5835 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5836 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5837 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5838 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5839 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5840 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5841 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5845 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5846 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5847 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5848 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5849 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5850 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5851 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5852 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5857 if (dump_enabled_p ())
5859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5860 "not vectorized: relevant stmt not ");
5861 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5862 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5863 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5872 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5873 need extra handling, except for vectorizable reductions. */
5874 if (STMT_VINFO_LIVE_P (stmt_info
)
5875 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5876 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5880 if (dump_enabled_p ())
5882 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5883 "not vectorized: live stmt not ");
5884 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5885 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5886 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5896 /* Function vect_transform_stmt.
5898 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5901 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5902 bool *grouped_store
, slp_tree slp_node
,
5903 slp_instance slp_node_instance
)
5905 bool is_store
= false;
5906 gimple vec_stmt
= NULL
;
5907 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5910 switch (STMT_VINFO_TYPE (stmt_info
))
5912 case type_demotion_vec_info_type
:
5913 case type_promotion_vec_info_type
:
5914 case type_conversion_vec_info_type
:
5915 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5919 case induc_vec_info_type
:
5920 gcc_assert (!slp_node
);
5921 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5925 case shift_vec_info_type
:
5926 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5930 case op_vec_info_type
:
5931 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5935 case assignment_vec_info_type
:
5936 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5940 case load_vec_info_type
:
5941 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5946 case store_vec_info_type
:
5947 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5951 /* In case of interleaving, the whole chain is vectorized when the
5952 last store in the chain is reached. Store stmts before the last
5953 one are skipped, and there vec_stmt_info shouldn't be freed
5955 *grouped_store
= true;
5956 if (STMT_VINFO_VEC_STMT (stmt_info
))
5963 case condition_vec_info_type
:
5964 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5968 case call_vec_info_type
:
5969 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5970 stmt
= gsi_stmt (*gsi
);
5973 case reduc_vec_info_type
:
5974 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5979 if (!STMT_VINFO_LIVE_P (stmt_info
))
5981 if (dump_enabled_p ())
5982 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5983 "stmt not supported.\n");
5988 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5989 is being vectorized, but outside the immediately enclosing loop. */
5991 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5992 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5993 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5994 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5995 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5996 || STMT_VINFO_RELEVANT (stmt_info
) ==
5997 vect_used_in_outer_by_reduction
))
5999 struct loop
*innerloop
= LOOP_VINFO_LOOP (
6000 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
6001 imm_use_iterator imm_iter
;
6002 use_operand_p use_p
;
6006 if (dump_enabled_p ())
6007 dump_printf_loc (MSG_NOTE
, vect_location
,
6008 "Record the vdef for outer-loop vectorization.\n");
6010 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6011 (to be used when vectorizing outer-loop stmts that use the DEF of
6013 if (gimple_code (stmt
) == GIMPLE_PHI
)
6014 scalar_dest
= PHI_RESULT (stmt
);
6016 scalar_dest
= gimple_assign_lhs (stmt
);
6018 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
6020 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
6022 exit_phi
= USE_STMT (use_p
);
6023 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
6028 /* Handle stmts whose DEF is used outside the loop-nest that is
6029 being vectorized. */
6030 if (STMT_VINFO_LIVE_P (stmt_info
)
6031 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
6033 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
6038 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
6044 /* Remove a group of stores (for SLP or interleaving), free their
6048 vect_remove_stores (gimple first_stmt
)
6050 gimple next
= first_stmt
;
6052 gimple_stmt_iterator next_si
;
6056 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
6058 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
6059 if (is_pattern_stmt_p (stmt_info
))
6060 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
6061 /* Free the attached stmt_vec_info and remove the stmt. */
6062 next_si
= gsi_for_stmt (next
);
6063 unlink_stmt_vdef (next
);
6064 gsi_remove (&next_si
, true);
6065 release_defs (next
);
6066 free_stmt_vec_info (next
);
6072 /* Function new_stmt_vec_info.
6074 Create and initialize a new stmt_vec_info struct for STMT. */
6077 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
6078 bb_vec_info bb_vinfo
)
6081 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
6083 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
6084 STMT_VINFO_STMT (res
) = stmt
;
6085 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
6086 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
6087 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
6088 STMT_VINFO_LIVE_P (res
) = false;
6089 STMT_VINFO_VECTYPE (res
) = NULL
;
6090 STMT_VINFO_VEC_STMT (res
) = NULL
;
6091 STMT_VINFO_VECTORIZABLE (res
) = true;
6092 STMT_VINFO_IN_PATTERN_P (res
) = false;
6093 STMT_VINFO_RELATED_STMT (res
) = NULL
;
6094 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
6095 STMT_VINFO_DATA_REF (res
) = NULL
;
6097 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
6098 STMT_VINFO_DR_OFFSET (res
) = NULL
;
6099 STMT_VINFO_DR_INIT (res
) = NULL
;
6100 STMT_VINFO_DR_STEP (res
) = NULL
;
6101 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
6103 if (gimple_code (stmt
) == GIMPLE_PHI
6104 && is_loop_header_bb_p (gimple_bb (stmt
)))
6105 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
6107 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
6109 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
6110 STMT_SLP_TYPE (res
) = loop_vect
;
6111 GROUP_FIRST_ELEMENT (res
) = NULL
;
6112 GROUP_NEXT_ELEMENT (res
) = NULL
;
6113 GROUP_SIZE (res
) = 0;
6114 GROUP_STORE_COUNT (res
) = 0;
6115 GROUP_GAP (res
) = 0;
6116 GROUP_SAME_DR_STMT (res
) = NULL
;
6122 /* Create a hash table for stmt_vec_info. */
6125 init_stmt_vec_info_vec (void)
6127 gcc_assert (!stmt_vec_info_vec
.exists ());
6128 stmt_vec_info_vec
.create (50);
6132 /* Free hash table for stmt_vec_info. */
6135 free_stmt_vec_info_vec (void)
6139 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
6141 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
6142 gcc_assert (stmt_vec_info_vec
.exists ());
6143 stmt_vec_info_vec
.release ();
6147 /* Free stmt vectorization related info. */
6150 free_stmt_vec_info (gimple stmt
)
6152 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6157 /* Check if this statement has a related "pattern stmt"
6158 (introduced by the vectorizer during the pattern recognition
6159 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6161 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
6163 stmt_vec_info patt_info
6164 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6167 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
6170 gimple_stmt_iterator si
;
6171 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
6172 free_stmt_vec_info (gsi_stmt (si
));
6174 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
6178 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
6179 set_vinfo_for_stmt (stmt
, NULL
);
6184 /* Function get_vectype_for_scalar_type_and_size.
6186 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6190 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
6192 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
6193 enum machine_mode simd_mode
;
6194 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
6201 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
6202 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
6205 /* For vector types of elements whose mode precision doesn't
6206 match their types precision we use a element type of mode
6207 precision. The vectorization routines will have to make sure
6208 they support the proper result truncation/extension.
6209 We also make sure to build vector types with INTEGER_TYPE
6210 component type only. */
6211 if (INTEGRAL_TYPE_P (scalar_type
)
6212 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
6213 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
6214 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
6215 TYPE_UNSIGNED (scalar_type
));
6217 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6218 When the component mode passes the above test simply use a type
6219 corresponding to that mode. The theory is that any use that
6220 would cause problems with this will disable vectorization anyway. */
6221 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
6222 && !INTEGRAL_TYPE_P (scalar_type
))
6223 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
6225 /* We can't build a vector type of elements with alignment bigger than
6227 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
6228 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
6229 TYPE_UNSIGNED (scalar_type
));
6231 /* If we felt back to using the mode fail if there was
6232 no scalar type for it. */
6233 if (scalar_type
== NULL_TREE
)
6236 /* If no size was supplied use the mode the target prefers. Otherwise
6237 lookup a vector mode of the specified size. */
6239 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6241 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6242 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6246 vectype
= build_vector_type (scalar_type
, nunits
);
6248 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6249 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6255 unsigned int current_vector_size
;
6257 /* Function get_vectype_for_scalar_type.
6259 Returns the vector type corresponding to SCALAR_TYPE as supported
6263 get_vectype_for_scalar_type (tree scalar_type
)
6266 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6267 current_vector_size
);
6269 && current_vector_size
== 0)
6270 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6274 /* Function get_same_sized_vectype
6276 Returns a vector type corresponding to SCALAR_TYPE of size
6277 VECTOR_TYPE if supported by the target. */
6280 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6282 return get_vectype_for_scalar_type_and_size
6283 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6286 /* Function vect_is_simple_use.
6289 LOOP_VINFO - the vect info of the loop that is being vectorized.
6290 BB_VINFO - the vect info of the basic block that is being vectorized.
6291 OPERAND - operand of STMT in the loop or bb.
6292 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6294 Returns whether a stmt with OPERAND can be vectorized.
6295 For loops, supportable operands are constants, loop invariants, and operands
6296 that are defined by the current iteration of the loop. Unsupportable
6297 operands are those that are defined by a previous iteration of the loop (as
6298 is the case in reduction/induction computations).
6299 For basic blocks, supportable operands are constants and bb invariants.
6300 For now, operands defined outside the basic block are not supported. */
6303 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6304 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6305 tree
*def
, enum vect_def_type
*dt
)
6308 stmt_vec_info stmt_vinfo
;
6309 struct loop
*loop
= NULL
;
6312 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6317 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_NOTE
, vect_location
,
6320 "vect_is_simple_use: operand ");
6321 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
6322 dump_printf (MSG_NOTE
, "\n");
6325 if (CONSTANT_CLASS_P (operand
))
6327 *dt
= vect_constant_def
;
6331 if (is_gimple_min_invariant (operand
))
6334 *dt
= vect_external_def
;
6338 if (TREE_CODE (operand
) == PAREN_EXPR
)
6340 if (dump_enabled_p ())
6341 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
6342 operand
= TREE_OPERAND (operand
, 0);
6345 if (TREE_CODE (operand
) != SSA_NAME
)
6347 if (dump_enabled_p ())
6348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6353 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6354 if (*def_stmt
== NULL
)
6356 if (dump_enabled_p ())
6357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6362 if (dump_enabled_p ())
6364 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
6365 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
6366 dump_printf (MSG_NOTE
, "\n");
6369 /* Empty stmt is expected only in case of a function argument.
6370 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6371 if (gimple_nop_p (*def_stmt
))
6374 *dt
= vect_external_def
;
6378 bb
= gimple_bb (*def_stmt
);
6380 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6381 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6382 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6383 *dt
= vect_external_def
;
6386 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6387 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6390 if (*dt
== vect_unknown_def_type
6392 && *dt
== vect_double_reduction_def
6393 && gimple_code (stmt
) != GIMPLE_PHI
))
6395 if (dump_enabled_p ())
6396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6397 "Unsupported pattern.\n");
6401 if (dump_enabled_p ())
6402 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
6404 switch (gimple_code (*def_stmt
))
6407 *def
= gimple_phi_result (*def_stmt
);
6411 *def
= gimple_assign_lhs (*def_stmt
);
6415 *def
= gimple_call_lhs (*def_stmt
);
6420 if (dump_enabled_p ())
6421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6422 "unsupported defining stmt:\n");
6429 /* Function vect_is_simple_use_1.
6431 Same as vect_is_simple_use_1 but also determines the vector operand
6432 type of OPERAND and stores it to *VECTYPE. If the definition of
6433 OPERAND is vect_uninitialized_def, vect_constant_def or
6434 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6435 is responsible to compute the best suited vector type for the
6439 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6440 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6441 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6443 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6447 /* Now get a vector type if the def is internal, otherwise supply
6448 NULL_TREE and leave it up to the caller to figure out a proper
6449 type for the use stmt. */
6450 if (*dt
== vect_internal_def
6451 || *dt
== vect_induction_def
6452 || *dt
== vect_reduction_def
6453 || *dt
== vect_double_reduction_def
6454 || *dt
== vect_nested_cycle
)
6456 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6458 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6459 && !STMT_VINFO_RELEVANT (stmt_info
)
6460 && !STMT_VINFO_LIVE_P (stmt_info
))
6461 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6463 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6464 gcc_assert (*vectype
!= NULL_TREE
);
6466 else if (*dt
== vect_uninitialized_def
6467 || *dt
== vect_constant_def
6468 || *dt
== vect_external_def
)
6469 *vectype
= NULL_TREE
;
6477 /* Function supportable_widening_operation
6479 Check whether an operation represented by the code CODE is a
6480 widening operation that is supported by the target platform in
6481 vector form (i.e., when operating on arguments of type VECTYPE_IN
6482 producing a result of type VECTYPE_OUT).
6484 Widening operations we currently support are NOP (CONVERT), FLOAT
6485 and WIDEN_MULT. This function checks if these operations are supported
6486 by the target platform either directly (via vector tree-codes), or via
6490 - CODE1 and CODE2 are codes of vector operations to be used when
6491 vectorizing the operation, if available.
6492 - MULTI_STEP_CVT determines the number of required intermediate steps in
6493 case of multi-step conversion (like char->short->int - in that case
6494 MULTI_STEP_CVT will be 1).
6495 - INTERM_TYPES contains the intermediate type required to perform the
6496 widening operation (short in the above example). */
6499 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6500 tree vectype_out
, tree vectype_in
,
6501 enum tree_code
*code1
, enum tree_code
*code2
,
6502 int *multi_step_cvt
,
6503 vec
<tree
> *interm_types
)
6505 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6506 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6507 struct loop
*vect_loop
= NULL
;
6508 enum machine_mode vec_mode
;
6509 enum insn_code icode1
, icode2
;
6510 optab optab1
, optab2
;
6511 tree vectype
= vectype_in
;
6512 tree wide_vectype
= vectype_out
;
6513 enum tree_code c1
, c2
;
6515 tree prev_type
, intermediate_type
;
6516 enum machine_mode intermediate_mode
, prev_mode
;
6517 optab optab3
, optab4
;
6519 *multi_step_cvt
= 0;
6521 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6525 case WIDEN_MULT_EXPR
:
6526 /* The result of a vectorized widening operation usually requires
6527 two vectors (because the widened results do not fit into one vector).
6528 The generated vector results would normally be expected to be
6529 generated in the same order as in the original scalar computation,
6530 i.e. if 8 results are generated in each vector iteration, they are
6531 to be organized as follows:
6532 vect1: [res1,res2,res3,res4],
6533 vect2: [res5,res6,res7,res8].
6535 However, in the special case that the result of the widening
6536 operation is used in a reduction computation only, the order doesn't
6537 matter (because when vectorizing a reduction we change the order of
6538 the computation). Some targets can take advantage of this and
6539 generate more efficient code. For example, targets like Altivec,
6540 that support widen_mult using a sequence of {mult_even,mult_odd}
6541 generate the following vectors:
6542 vect1: [res1,res3,res5,res7],
6543 vect2: [res2,res4,res6,res8].
6545 When vectorizing outer-loops, we execute the inner-loop sequentially
6546 (each vectorized inner-loop iteration contributes to VF outer-loop
6547 iterations in parallel). We therefore don't allow to change the
6548 order of the computation in the inner-loop during outer-loop
6550 /* TODO: Another case in which order doesn't *really* matter is when we
6551 widen and then contract again, e.g. (short)((int)x * y >> 8).
6552 Normally, pack_trunc performs an even/odd permute, whereas the
6553 repack from an even/odd expansion would be an interleave, which
6554 would be significantly simpler for e.g. AVX2. */
6555 /* In any case, in order to avoid duplicating the code below, recurse
6556 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6557 are properly set up for the caller. If we fail, we'll continue with
6558 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6560 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6561 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6562 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6563 stmt
, vectype_out
, vectype_in
,
6564 code1
, code2
, multi_step_cvt
,
6567 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6568 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6571 case VEC_WIDEN_MULT_EVEN_EXPR
:
6572 /* Support the recursion induced just above. */
6573 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6574 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6577 case WIDEN_LSHIFT_EXPR
:
6578 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6579 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6583 c1
= VEC_UNPACK_LO_EXPR
;
6584 c2
= VEC_UNPACK_HI_EXPR
;
6588 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6589 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6592 case FIX_TRUNC_EXPR
:
6593 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6594 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6595 computing the operation. */
6602 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6604 enum tree_code ctmp
= c1
;
6609 if (code
== FIX_TRUNC_EXPR
)
6611 /* The signedness is determined from output operand. */
6612 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6613 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6617 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6618 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6621 if (!optab1
|| !optab2
)
6624 vec_mode
= TYPE_MODE (vectype
);
6625 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6626 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6632 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6633 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6636 /* Check if it's a multi-step conversion that can be done using intermediate
6639 prev_type
= vectype
;
6640 prev_mode
= vec_mode
;
6642 if (!CONVERT_EXPR_CODE_P (code
))
6645 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6646 intermediate steps in promotion sequence. We try
6647 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6649 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6650 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6652 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6654 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6655 TYPE_UNSIGNED (prev_type
));
6656 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6657 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6659 if (!optab3
|| !optab4
6660 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6661 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6662 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6663 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6664 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6665 == CODE_FOR_nothing
)
6666 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6667 == CODE_FOR_nothing
))
6670 interm_types
->quick_push (intermediate_type
);
6671 (*multi_step_cvt
)++;
6673 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6674 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6677 prev_type
= intermediate_type
;
6678 prev_mode
= intermediate_mode
;
6681 interm_types
->release ();
6686 /* Function supportable_narrowing_operation
6688 Check whether an operation represented by the code CODE is a
6689 narrowing operation that is supported by the target platform in
6690 vector form (i.e., when operating on arguments of type VECTYPE_IN
6691 and producing a result of type VECTYPE_OUT).
6693 Narrowing operations we currently support are NOP (CONVERT) and
6694 FIX_TRUNC. This function checks if these operations are supported by
6695 the target platform directly via vector tree-codes.
6698 - CODE1 is the code of a vector operation to be used when
6699 vectorizing the operation, if available.
6700 - MULTI_STEP_CVT determines the number of required intermediate steps in
6701 case of multi-step conversion (like int->short->char - in that case
6702 MULTI_STEP_CVT will be 1).
6703 - INTERM_TYPES contains the intermediate type required to perform the
6704 narrowing operation (short in the above example). */
6707 supportable_narrowing_operation (enum tree_code code
,
6708 tree vectype_out
, tree vectype_in
,
6709 enum tree_code
*code1
, int *multi_step_cvt
,
6710 vec
<tree
> *interm_types
)
6712 enum machine_mode vec_mode
;
6713 enum insn_code icode1
;
6714 optab optab1
, interm_optab
;
6715 tree vectype
= vectype_in
;
6716 tree narrow_vectype
= vectype_out
;
6718 tree intermediate_type
;
6719 enum machine_mode intermediate_mode
, prev_mode
;
6723 *multi_step_cvt
= 0;
6727 c1
= VEC_PACK_TRUNC_EXPR
;
6730 case FIX_TRUNC_EXPR
:
6731 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6735 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6736 tree code and optabs used for computing the operation. */
6743 if (code
== FIX_TRUNC_EXPR
)
6744 /* The signedness is determined from output operand. */
6745 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6747 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6752 vec_mode
= TYPE_MODE (vectype
);
6753 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6758 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6761 /* Check if it's a multi-step conversion that can be done using intermediate
6763 prev_mode
= vec_mode
;
6764 if (code
== FIX_TRUNC_EXPR
)
6765 uns
= TYPE_UNSIGNED (vectype_out
);
6767 uns
= TYPE_UNSIGNED (vectype
);
6769 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6770 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6771 costly than signed. */
6772 if (code
== FIX_TRUNC_EXPR
&& uns
)
6774 enum insn_code icode2
;
6777 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6779 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6780 if (interm_optab
!= unknown_optab
6781 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6782 && insn_data
[icode1
].operand
[0].mode
6783 == insn_data
[icode2
].operand
[0].mode
)
6786 optab1
= interm_optab
;
6791 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6792 intermediate steps in promotion sequence. We try
6793 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6794 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6795 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6797 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6799 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6801 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6804 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6805 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6806 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6807 == CODE_FOR_nothing
))
6810 interm_types
->quick_push (intermediate_type
);
6811 (*multi_step_cvt
)++;
6813 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6816 prev_mode
= intermediate_mode
;
6817 optab1
= interm_optab
;
6820 interm_types
->release ();