1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
33 #include "gimple-ssa.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-ssa-loop-manip.h"
41 #include "recog.h" /* FIXME: for insn_data */
43 #include "diagnostic-core.h"
44 #include "tree-vectorizer.h"
47 /* For lang_hooks.types.type_for_mode. */
48 #include "langhooks.h"
50 /* Return the vectorized type for the given statement. */
53 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
55 return STMT_VINFO_VECTYPE (stmt_info
);
58 /* Return TRUE iff the given statement is in an inner loop relative to
59 the loop being vectorized. */
61 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
63 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
64 basic_block bb
= gimple_bb (stmt
);
65 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
71 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
73 return (bb
->loop_father
== loop
->inner
);
76 /* Record the cost of a statement, either by directly informing the
77 target model or by saving it in a vector for later processing.
78 Return a preliminary estimate of the statement's cost. */
81 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
82 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
83 int misalign
, enum vect_cost_model_location where
)
87 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
88 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
89 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
92 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
97 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
98 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
99 void *target_cost_data
;
102 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
104 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
106 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
116 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
126 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
127 tree array
, unsigned HOST_WIDE_INT n
)
129 tree vect_type
, vect
, vect_name
, array_ref
;
132 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
133 vect_type
= TREE_TYPE (TREE_TYPE (array
));
134 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
135 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
136 build_int_cst (size_type_node
, n
),
137 NULL_TREE
, NULL_TREE
);
139 new_stmt
= gimple_build_assign (vect
, array_ref
);
140 vect_name
= make_ssa_name (vect
, new_stmt
);
141 gimple_assign_set_lhs (new_stmt
, vect_name
);
142 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
152 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
153 tree array
, unsigned HOST_WIDE_INT n
)
158 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
159 build_int_cst (size_type_node
, n
),
160 NULL_TREE
, NULL_TREE
);
162 new_stmt
= gimple_build_assign (array_ref
, vect
);
163 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
171 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
173 tree mem_ref
, alias_ptr_type
;
175 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
176 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
182 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
184 /* Function vect_mark_relevant.
186 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
190 enum vect_relevant relevant
, bool live_p
,
191 bool used_in_pattern
)
193 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
194 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
195 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
198 if (dump_enabled_p ())
199 dump_printf_loc (MSG_NOTE
, vect_location
,
200 "mark relevant %d, live %d.\n", relevant
, live_p
);
202 /* If this stmt is an original stmt in a pattern, we might need to mark its
203 related pattern stmt instead of the original stmt. However, such stmts
204 may have their own uses that are not in any pattern, in such cases the
205 stmt itself should be marked. */
206 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
209 if (!used_in_pattern
)
211 imm_use_iterator imm_iter
;
215 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
216 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
218 if (is_gimple_assign (stmt
))
219 lhs
= gimple_assign_lhs (stmt
);
221 lhs
= gimple_call_lhs (stmt
);
223 /* This use is out of pattern use, if LHS has other uses that are
224 pattern uses, we should mark the stmt itself, and not the pattern
226 if (TREE_CODE (lhs
) == SSA_NAME
)
227 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
229 if (is_gimple_debug (USE_STMT (use_p
)))
231 use_stmt
= USE_STMT (use_p
);
233 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
236 if (vinfo_for_stmt (use_stmt
)
237 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
247 /* This is the last stmt in a sequence that was detected as a
248 pattern that can potentially be vectorized. Don't mark the stmt
249 as relevant/live because it's not going to be vectorized.
250 Instead mark the pattern-stmt that replaces it. */
252 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
254 if (dump_enabled_p ())
255 dump_printf_loc (MSG_NOTE
, vect_location
,
256 "last stmt in pattern. don't mark"
257 " relevant/live.\n");
258 stmt_info
= vinfo_for_stmt (pattern_stmt
);
259 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
260 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
261 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
266 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
267 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
268 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
270 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
271 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE
, vect_location
,
275 "already marked relevant/live.\n");
279 worklist
->safe_push (stmt
);
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT in loop that is represented by LOOP_VINFO is
286 "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
296 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
297 enum vect_relevant
*relevant
, bool *live_p
)
299 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
301 imm_use_iterator imm_iter
;
305 *relevant
= vect_unused_in_scope
;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt
)
310 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
311 != loop_exit_ctrl_vec_info_type
)
312 *relevant
= vect_used_in_scope
;
314 /* changing memory. */
315 if (gimple_code (stmt
) != GIMPLE_PHI
)
316 if (gimple_vdef (stmt
))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE
, vect_location
,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant
= vect_used_in_scope
;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
327 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
329 basic_block bb
= gimple_bb (USE_STMT (use_p
));
330 if (!flow_bb_inside_loop_p (loop
, bb
))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE
, vect_location
,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p
)))
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
342 gcc_assert (bb
== single_exit (loop
)->dest
);
349 return (*live_p
|| *relevant
);
353 /* Function exist_non_indexing_operands_for_use_p
355 USE is one of the uses attached to STMT. Check if USE is
356 used in STMT for anything other than indexing an array. */
359 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
362 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
364 /* USE corresponds to some operand in STMT. If there is no data
365 reference in STMT, then any operand that corresponds to USE
366 is not indexing an array. */
367 if (!STMT_VINFO_DATA_REF (stmt_info
))
370 /* STMT has a data_ref. FORNOW this means that its of one of
374 (This should have been verified in analyze_data_refs).
376 'var' in the second case corresponds to a def, not a use,
377 so USE cannot correspond to any operands that are not used
380 Therefore, all we need to check is if STMT falls into the
381 first case, and whether var corresponds to USE. */
383 if (!gimple_assign_copy_p (stmt
))
385 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
387 operand
= gimple_assign_rhs1 (stmt
);
388 if (TREE_CODE (operand
) != SSA_NAME
)
399 Function process_use.
402 - a USE in STMT in a loop represented by LOOP_VINFO
403 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
404 that defined USE. This is done by calling mark_relevant and passing it
405 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
406 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
410 Generally, LIVE_P and RELEVANT are used to define the liveness and
411 relevance info of the DEF_STMT of this USE:
412 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
413 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
415 - case 1: If USE is used only for address computations (e.g. array indexing),
416 which does not need to be directly vectorized, then the liveness/relevance
417 of the respective DEF_STMT is left unchanged.
418 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
419 skip DEF_STMT cause it had already been processed.
420 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
421 be modified accordingly.
423 Return true if everything is as expected. Return false otherwise. */
426 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
427 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
430 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
431 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
432 stmt_vec_info dstmt_vinfo
;
433 basic_block bb
, def_bb
;
436 enum vect_def_type dt
;
438 /* case 1: we are only interested in uses that need to be vectorized. Uses
439 that are used for address computation are not considered relevant. */
440 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
443 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
445 if (dump_enabled_p ())
446 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
447 "not vectorized: unsupported use in stmt.\n");
451 if (!def_stmt
|| gimple_nop_p (def_stmt
))
454 def_bb
= gimple_bb (def_stmt
);
455 if (!flow_bb_inside_loop_p (loop
, def_bb
))
457 if (dump_enabled_p ())
458 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
462 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
463 DEF_STMT must have already been processed, because this should be the
464 only way that STMT, which is a reduction-phi, was put in the worklist,
465 as there should be no other uses for DEF_STMT in the loop. So we just
466 check that everything is as expected, and we are done. */
467 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
468 bb
= gimple_bb (stmt
);
469 if (gimple_code (stmt
) == GIMPLE_PHI
470 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
471 && gimple_code (def_stmt
) != GIMPLE_PHI
472 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
473 && bb
->loop_father
== def_bb
->loop_father
)
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_NOTE
, vect_location
,
477 "reduc-stmt defining reduc-phi in the same nest.\n");
478 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
479 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
480 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
481 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
482 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
486 /* case 3a: outer-loop stmt defining an inner-loop stmt:
487 outer-loop-header-bb:
493 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
495 if (dump_enabled_p ())
496 dump_printf_loc (MSG_NOTE
, vect_location
,
497 "outer-loop def-stmt defining inner-loop stmt.\n");
501 case vect_unused_in_scope
:
502 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
503 vect_used_in_scope
: vect_unused_in_scope
;
506 case vect_used_in_outer_by_reduction
:
507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
508 relevant
= vect_used_by_reduction
;
511 case vect_used_in_outer
:
512 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
513 relevant
= vect_used_in_scope
;
516 case vect_used_in_scope
:
524 /* case 3b: inner-loop stmt defining an outer-loop stmt:
525 outer-loop-header-bb:
529 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
531 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
533 if (dump_enabled_p ())
534 dump_printf_loc (MSG_NOTE
, vect_location
,
535 "inner-loop def-stmt defining outer-loop stmt.\n");
539 case vect_unused_in_scope
:
540 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
541 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
542 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
545 case vect_used_by_reduction
:
546 relevant
= vect_used_in_outer_by_reduction
;
549 case vect_used_in_scope
:
550 relevant
= vect_used_in_outer
;
558 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
559 is_pattern_stmt_p (stmt_vinfo
));
564 /* Function vect_mark_stmts_to_be_vectorized.
566 Not all stmts in the loop need to be vectorized. For example:
575 Stmt 1 and 3 do not need to be vectorized, because loop control and
576 addressing of vectorized data-refs are handled differently.
578 This pass detects such stmts. */
581 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
583 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
584 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
585 unsigned int nbbs
= loop
->num_nodes
;
586 gimple_stmt_iterator si
;
589 stmt_vec_info stmt_vinfo
;
593 enum vect_relevant relevant
, tmp_relevant
;
594 enum vect_def_type def_type
;
596 if (dump_enabled_p ())
597 dump_printf_loc (MSG_NOTE
, vect_location
,
598 "=== vect_mark_stmts_to_be_vectorized ===\n");
600 stack_vec
<gimple
, 64> worklist
;
602 /* 1. Init worklist. */
603 for (i
= 0; i
< nbbs
; i
++)
606 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
609 if (dump_enabled_p ())
611 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
612 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
613 dump_printf (MSG_NOTE
, "\n");
616 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
617 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
619 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
621 stmt
= gsi_stmt (si
);
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
625 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
626 dump_printf (MSG_NOTE
, "\n");
629 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
630 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
634 /* 2. Process_worklist */
635 while (worklist
.length () > 0)
640 stmt
= worklist
.pop ();
641 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
644 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
645 dump_printf (MSG_NOTE
, "\n");
648 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
649 (DEF_STMT) as relevant/irrelevant and live/dead according to the
650 liveness and relevance properties of STMT. */
651 stmt_vinfo
= vinfo_for_stmt (stmt
);
652 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
653 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
655 /* Generally, the liveness and relevance properties of STMT are
656 propagated as is to the DEF_STMTs of its USEs:
657 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
658 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
660 One exception is when STMT has been identified as defining a reduction
661 variable; in this case we set the liveness/relevance as follows:
663 relevant = vect_used_by_reduction
664 This is because we distinguish between two kinds of relevant stmts -
665 those that are used by a reduction computation, and those that are
666 (also) used by a regular computation. This allows us later on to
667 identify stmts that are used solely by a reduction, and therefore the
668 order of the results that they produce does not have to be kept. */
670 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
671 tmp_relevant
= relevant
;
674 case vect_reduction_def
:
675 switch (tmp_relevant
)
677 case vect_unused_in_scope
:
678 relevant
= vect_used_by_reduction
;
681 case vect_used_by_reduction
:
682 if (gimple_code (stmt
) == GIMPLE_PHI
)
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
689 "unsupported use of reduction.\n");
696 case vect_nested_cycle
:
697 if (tmp_relevant
!= vect_unused_in_scope
698 && tmp_relevant
!= vect_used_in_outer_by_reduction
699 && tmp_relevant
!= vect_used_in_outer
)
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
703 "unsupported use of nested cycle.\n");
711 case vect_double_reduction_def
:
712 if (tmp_relevant
!= vect_unused_in_scope
713 && tmp_relevant
!= vect_used_by_reduction
)
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
717 "unsupported use of double reduction.\n");
729 if (is_pattern_stmt_p (stmt_vinfo
))
731 /* Pattern statements are not inserted into the code, so
732 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
733 have to scan the RHS or function arguments instead. */
734 if (is_gimple_assign (stmt
))
736 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
737 tree op
= gimple_assign_rhs1 (stmt
);
740 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
742 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
743 live_p
, relevant
, &worklist
, false)
744 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
745 live_p
, relevant
, &worklist
, false))
749 for (; i
< gimple_num_ops (stmt
); i
++)
751 op
= gimple_op (stmt
, i
);
752 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
757 else if (is_gimple_call (stmt
))
759 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
761 tree arg
= gimple_call_arg (stmt
, i
);
762 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
769 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
771 tree op
= USE_FROM_PTR (use_p
);
772 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
777 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
780 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
782 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
786 } /* while worklist */
792 /* Function vect_model_simple_cost.
794 Models cost for simple operations, i.e. those that only emit ncopies of a
795 single op. Right now, this does not account for multiple insns that could
796 be generated for the single vector op. We will handle that shortly. */
799 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
800 enum vect_def_type
*dt
,
801 stmt_vector_for_cost
*prologue_cost_vec
,
802 stmt_vector_for_cost
*body_cost_vec
)
805 int inside_cost
= 0, prologue_cost
= 0;
807 /* The SLP costs were already calculated during SLP tree build. */
808 if (PURE_SLP_STMT (stmt_info
))
811 /* FORNOW: Assuming maximum 2 args per stmts. */
812 for (i
= 0; i
< 2; i
++)
813 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
814 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
815 stmt_info
, 0, vect_prologue
);
817 /* Pass the inside-of-loop statements to the target-specific cost model. */
818 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
819 stmt_info
, 0, vect_body
);
821 if (dump_enabled_p ())
822 dump_printf_loc (MSG_NOTE
, vect_location
,
823 "vect_model_simple_cost: inside_cost = %d, "
824 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
828 /* Model cost for type demotion and promotion operations. PWR is normally
829 zero for single-step promotions and demotions. It will be one if
830 two-step promotion/demotion is required, and so on. Each additional
831 step doubles the number of instructions required. */
834 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
835 enum vect_def_type
*dt
, int pwr
)
838 int inside_cost
= 0, prologue_cost
= 0;
839 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
840 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
841 void *target_cost_data
;
843 /* The SLP costs were already calculated during SLP tree build. */
844 if (PURE_SLP_STMT (stmt_info
))
848 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
850 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
852 for (i
= 0; i
< pwr
+ 1; i
++)
854 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
856 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
857 vec_promote_demote
, stmt_info
, 0,
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i
= 0; i
< 2; i
++)
863 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
864 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
865 stmt_info
, 0, vect_prologue
);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE
, vect_location
,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
873 /* Function vect_cost_group_size
875 For grouped load or store, return the group_size only if it is the first
876 load or store of a group, else return 1. This ensures that group size is
877 only returned once per group. */
880 vect_cost_group_size (stmt_vec_info stmt_info
)
882 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
884 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
885 return GROUP_SIZE (stmt_info
);
891 /* Function vect_model_store_cost
893 Models cost for stores. In the case of grouped accesses, one access
894 has the overhead of the grouped access attributed to it. */
897 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
898 bool store_lanes_p
, enum vect_def_type dt
,
900 stmt_vector_for_cost
*prologue_cost_vec
,
901 stmt_vector_for_cost
*body_cost_vec
)
904 unsigned int inside_cost
= 0, prologue_cost
= 0;
905 struct data_reference
*first_dr
;
908 /* The SLP costs were already calculated during SLP tree build. */
909 if (PURE_SLP_STMT (stmt_info
))
912 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
913 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
914 stmt_info
, 0, vect_prologue
);
916 /* Grouped access? */
917 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
921 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
926 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
927 group_size
= vect_cost_group_size (stmt_info
);
930 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
932 /* Not a grouped access. */
936 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
939 /* We assume that the cost of a single store-lanes instruction is
940 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
941 access is instead being provided by a permute-and-store operation,
942 include the cost of the permutes. */
943 if (!store_lanes_p
&& group_size
> 1)
945 /* Uses a high and low interleave operation for each needed permute. */
947 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
948 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
949 stmt_info
, 0, vect_body
);
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE
, vect_location
,
953 "vect_model_store_cost: strided group_size = %d .\n",
957 /* Costs of the stores. */
958 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
960 if (dump_enabled_p ())
961 dump_printf_loc (MSG_NOTE
, vect_location
,
962 "vect_model_store_cost: inside_cost = %d, "
963 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
967 /* Calculate cost of DR's memory access. */
969 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
970 unsigned int *inside_cost
,
971 stmt_vector_for_cost
*body_cost_vec
)
973 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
974 gimple stmt
= DR_STMT (dr
);
975 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
977 switch (alignment_support_scheme
)
981 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
982 vector_store
, stmt_info
, 0,
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_NOTE
, vect_location
,
987 "vect_model_store_cost: aligned.\n");
991 case dr_unaligned_supported
:
993 /* Here, we assign an additional cost for the unaligned store. */
994 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
995 unaligned_store
, stmt_info
,
996 DR_MISALIGNMENT (dr
), vect_body
);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE
, vect_location
,
999 "vect_model_store_cost: unaligned supported by "
1004 case dr_unaligned_unsupported
:
1006 *inside_cost
= VECT_MAX_COST
;
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1010 "vect_model_store_cost: unsupported access.\n");
1020 /* Function vect_model_load_cost
1022 Models cost for loads. In the case of grouped accesses, the last access
1023 has the overhead of the grouped access attributed to it. Since unaligned
1024 accesses are supported for loads, we also account for the costs of the
1025 access scheme chosen. */
1028 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1029 bool load_lanes_p
, slp_tree slp_node
,
1030 stmt_vector_for_cost
*prologue_cost_vec
,
1031 stmt_vector_for_cost
*body_cost_vec
)
1035 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1036 unsigned int inside_cost
= 0, prologue_cost
= 0;
1038 /* The SLP costs were already calculated during SLP tree build. */
1039 if (PURE_SLP_STMT (stmt_info
))
1042 /* Grouped accesses? */
1043 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1044 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1046 group_size
= vect_cost_group_size (stmt_info
);
1047 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1049 /* Not a grouped access. */
1056 /* We assume that the cost of a single load-lanes instruction is
1057 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1058 access is instead being provided by a load-and-permute operation,
1059 include the cost of the permutes. */
1060 if (!load_lanes_p
&& group_size
> 1)
1062 /* Uses an even and odd extract operations for each needed permute. */
1063 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1064 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1065 stmt_info
, 0, vect_body
);
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_NOTE
, vect_location
,
1069 "vect_model_load_cost: strided group_size = %d .\n",
1073 /* The loads themselves. */
1074 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1076 /* N scalar loads plus gathering them into a vector. */
1077 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1078 inside_cost
+= record_stmt_cost (body_cost_vec
,
1079 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1080 scalar_load
, stmt_info
, 0, vect_body
);
1081 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1082 stmt_info
, 0, vect_body
);
1085 vect_get_load_cost (first_dr
, ncopies
,
1086 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1087 || group_size
> 1 || slp_node
),
1088 &inside_cost
, &prologue_cost
,
1089 prologue_cost_vec
, body_cost_vec
, true);
1091 if (dump_enabled_p ())
1092 dump_printf_loc (MSG_NOTE
, vect_location
,
1093 "vect_model_load_cost: inside_cost = %d, "
1094 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1098 /* Calculate cost of DR's memory access. */
1100 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1101 bool add_realign_cost
, unsigned int *inside_cost
,
1102 unsigned int *prologue_cost
,
1103 stmt_vector_for_cost
*prologue_cost_vec
,
1104 stmt_vector_for_cost
*body_cost_vec
,
1105 bool record_prologue_costs
)
1107 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1108 gimple stmt
= DR_STMT (dr
);
1109 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1111 switch (alignment_support_scheme
)
1115 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1116 stmt_info
, 0, vect_body
);
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE
, vect_location
,
1120 "vect_model_load_cost: aligned.\n");
1124 case dr_unaligned_supported
:
1126 /* Here, we assign an additional cost for the unaligned load. */
1127 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1128 unaligned_load
, stmt_info
,
1129 DR_MISALIGNMENT (dr
), vect_body
);
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE
, vect_location
,
1133 "vect_model_load_cost: unaligned supported by "
1138 case dr_explicit_realign
:
1140 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1141 vector_load
, stmt_info
, 0, vect_body
);
1142 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1143 vec_perm
, stmt_info
, 0, vect_body
);
1145 /* FIXME: If the misalignment remains fixed across the iterations of
1146 the containing loop, the following cost should be added to the
1148 if (targetm
.vectorize
.builtin_mask_for_load
)
1149 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1150 stmt_info
, 0, vect_body
);
1152 if (dump_enabled_p ())
1153 dump_printf_loc (MSG_NOTE
, vect_location
,
1154 "vect_model_load_cost: explicit realign\n");
1158 case dr_explicit_realign_optimized
:
1160 if (dump_enabled_p ())
1161 dump_printf_loc (MSG_NOTE
, vect_location
,
1162 "vect_model_load_cost: unaligned software "
1165 /* Unaligned software pipeline has a load of an address, an initial
1166 load, and possibly a mask operation to "prime" the loop. However,
1167 if this is an access in a group of loads, which provide grouped
1168 access, then the above cost should only be considered for one
1169 access in the group. Inside the loop, there is a load op
1170 and a realignment op. */
1172 if (add_realign_cost
&& record_prologue_costs
)
1174 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1175 vector_stmt
, stmt_info
,
1177 if (targetm
.vectorize
.builtin_mask_for_load
)
1178 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1179 vector_stmt
, stmt_info
,
1183 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1184 stmt_info
, 0, vect_body
);
1185 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1186 stmt_info
, 0, vect_body
);
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE
, vect_location
,
1190 "vect_model_load_cost: explicit realign optimized"
1196 case dr_unaligned_unsupported
:
1198 *inside_cost
= VECT_MAX_COST
;
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1202 "vect_model_load_cost: unsupported access.\n");
1211 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1212 the loop preheader for the vectorized stmt STMT. */
1215 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1218 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1221 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1222 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1226 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1230 if (nested_in_vect_loop_p (loop
, stmt
))
1233 pe
= loop_preheader_edge (loop
);
1234 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1235 gcc_assert (!new_bb
);
1239 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1241 gimple_stmt_iterator gsi_bb_start
;
1243 gcc_assert (bb_vinfo
);
1244 bb
= BB_VINFO_BB (bb_vinfo
);
1245 gsi_bb_start
= gsi_after_labels (bb
);
1246 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1250 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE
, vect_location
,
1253 "created new init_stmt: ");
1254 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1255 dump_printf (MSG_NOTE
, "\n");
1259 /* Function vect_init_vector.
1261 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1262 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1263 vector type a vector with all elements equal to VAL is created first.
1264 Place the initialization at BSI if it is not NULL. Otherwise, place the
1265 initialization at the loop preheader.
1266 Return the DEF of INIT_STMT.
1267 It will be used in the vectorization of STMT. */
1270 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1277 if (TREE_CODE (type
) == VECTOR_TYPE
1278 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1280 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1282 if (CONSTANT_CLASS_P (val
))
1283 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1286 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1287 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1290 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1294 val
= build_vector_from_val (type
, val
);
1297 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1298 init_stmt
= gimple_build_assign (new_var
, val
);
1299 new_temp
= make_ssa_name (new_var
, init_stmt
);
1300 gimple_assign_set_lhs (init_stmt
, new_temp
);
1301 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1302 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1307 /* Function vect_get_vec_def_for_operand.
1309 OP is an operand in STMT. This function returns a (vector) def that will be
1310 used in the vectorized stmt for STMT.
1312 In the case that OP is an SSA_NAME which is defined in the loop, then
1313 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1315 In case OP is an invariant or constant, a new stmt that creates a vector def
1316 needs to be introduced. */
1319 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1324 stmt_vec_info def_stmt_info
= NULL
;
1325 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1326 unsigned int nunits
;
1327 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1329 enum vect_def_type dt
;
1333 if (dump_enabled_p ())
1335 dump_printf_loc (MSG_NOTE
, vect_location
,
1336 "vect_get_vec_def_for_operand: ");
1337 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1338 dump_printf (MSG_NOTE
, "\n");
1341 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1342 &def_stmt
, &def
, &dt
);
1343 gcc_assert (is_simple_use
);
1344 if (dump_enabled_p ())
1346 int loc_printed
= 0;
1349 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1351 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1352 dump_printf (MSG_NOTE
, "\n");
1357 dump_printf (MSG_NOTE
, " def_stmt = ");
1359 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1360 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1361 dump_printf (MSG_NOTE
, "\n");
1367 /* Case 1: operand is a constant. */
1368 case vect_constant_def
:
1370 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1371 gcc_assert (vector_type
);
1372 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1377 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1378 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE
, vect_location
,
1380 "Create vector_cst. nunits = %d\n", nunits
);
1382 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1385 /* Case 2: operand is defined outside the loop - loop invariant. */
1386 case vect_external_def
:
1388 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1389 gcc_assert (vector_type
);
1394 /* Create 'vec_inv = {inv,inv,..,inv}' */
1395 if (dump_enabled_p ())
1396 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1398 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1401 /* Case 3: operand is defined inside the loop. */
1402 case vect_internal_def
:
1405 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1407 /* Get the def from the vectorized stmt. */
1408 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1410 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1411 /* Get vectorized pattern statement. */
1413 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1414 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1415 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1416 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1417 gcc_assert (vec_stmt
);
1418 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1419 vec_oprnd
= PHI_RESULT (vec_stmt
);
1420 else if (is_gimple_call (vec_stmt
))
1421 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1423 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1427 /* Case 4: operand is defined by a loop header phi - reduction */
1428 case vect_reduction_def
:
1429 case vect_double_reduction_def
:
1430 case vect_nested_cycle
:
1434 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1435 loop
= (gimple_bb (def_stmt
))->loop_father
;
1437 /* Get the def before the loop */
1438 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1439 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1442 /* Case 5: operand is defined by loop-header phi - induction. */
1443 case vect_induction_def
:
1445 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1447 /* Get the def from the vectorized stmt. */
1448 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1449 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1450 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1451 vec_oprnd
= PHI_RESULT (vec_stmt
);
1453 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1463 /* Function vect_get_vec_def_for_stmt_copy
1465 Return a vector-def for an operand. This function is used when the
1466 vectorized stmt to be created (by the caller to this function) is a "copy"
1467 created in case the vectorized result cannot fit in one vector, and several
1468 copies of the vector-stmt are required. In this case the vector-def is
1469 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1470 of the stmt that defines VEC_OPRND.
1471 DT is the type of the vector def VEC_OPRND.
1474 In case the vectorization factor (VF) is bigger than the number
1475 of elements that can fit in a vectype (nunits), we have to generate
1476 more than one vector stmt to vectorize the scalar stmt. This situation
1477 arises when there are multiple data-types operated upon in the loop; the
1478 smallest data-type determines the VF, and as a result, when vectorizing
1479 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1480 vector stmt (each computing a vector of 'nunits' results, and together
1481 computing 'VF' results in each iteration). This function is called when
1482 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1483 which VF=16 and nunits=4, so the number of copies required is 4):
1485 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1487 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1488 VS1.1: vx.1 = memref1 VS1.2
1489 VS1.2: vx.2 = memref2 VS1.3
1490 VS1.3: vx.3 = memref3
1492 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1493 VSnew.1: vz1 = vx.1 + ... VSnew.2
1494 VSnew.2: vz2 = vx.2 + ... VSnew.3
1495 VSnew.3: vz3 = vx.3 + ...
1497 The vectorization of S1 is explained in vectorizable_load.
1498 The vectorization of S2:
1499 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1500 the function 'vect_get_vec_def_for_operand' is called to
1501 get the relevant vector-def for each operand of S2. For operand x it
1502 returns the vector-def 'vx.0'.
1504 To create the remaining copies of the vector-stmt (VSnew.j), this
1505 function is called to get the relevant vector-def for each operand. It is
1506 obtained from the respective VS1.j stmt, which is recorded in the
1507 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1509 For example, to obtain the vector-def 'vx.1' in order to create the
1510 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1511 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1512 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1513 and return its def ('vx.1').
1514 Overall, to create the above sequence this function will be called 3 times:
1515 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1516 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1517 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1520 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1522 gimple vec_stmt_for_operand
;
1523 stmt_vec_info def_stmt_info
;
1525 /* Do nothing; can reuse same def. */
1526 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1529 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1530 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1531 gcc_assert (def_stmt_info
);
1532 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1533 gcc_assert (vec_stmt_for_operand
);
1534 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1535 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1536 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1538 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1543 /* Get vectorized definitions for the operands to create a copy of an original
1544 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1547 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1548 vec
<tree
> *vec_oprnds0
,
1549 vec
<tree
> *vec_oprnds1
)
1551 tree vec_oprnd
= vec_oprnds0
->pop ();
1553 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1554 vec_oprnds0
->quick_push (vec_oprnd
);
1556 if (vec_oprnds1
&& vec_oprnds1
->length ())
1558 vec_oprnd
= vec_oprnds1
->pop ();
1559 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1560 vec_oprnds1
->quick_push (vec_oprnd
);
1565 /* Get vectorized definitions for OP0 and OP1.
1566 REDUC_INDEX is the index of reduction operand in case of reduction,
1567 and -1 otherwise. */
1570 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1571 vec
<tree
> *vec_oprnds0
,
1572 vec
<tree
> *vec_oprnds1
,
1573 slp_tree slp_node
, int reduc_index
)
1577 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1580 vec
<vec
<tree
> > vec_defs
;
1581 vec_defs
.create (nops
);
1583 ops
.quick_push (op0
);
1585 ops
.quick_push (op1
);
1587 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1589 *vec_oprnds0
= vec_defs
[0];
1591 *vec_oprnds1
= vec_defs
[1];
1594 vec_defs
.release ();
1600 vec_oprnds0
->create (1);
1601 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1602 vec_oprnds0
->quick_push (vec_oprnd
);
1606 vec_oprnds1
->create (1);
1607 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1608 vec_oprnds1
->quick_push (vec_oprnd
);
1614 /* Function vect_finish_stmt_generation.
1616 Insert a new stmt. */
1619 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1620 gimple_stmt_iterator
*gsi
)
1622 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1623 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1624 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1626 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1628 if (!gsi_end_p (*gsi
)
1629 && gimple_has_mem_ops (vec_stmt
))
1631 gimple at_stmt
= gsi_stmt (*gsi
);
1632 tree vuse
= gimple_vuse (at_stmt
);
1633 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1635 tree vdef
= gimple_vdef (at_stmt
);
1636 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1637 /* If we have an SSA vuse and insert a store, update virtual
1638 SSA form to avoid triggering the renamer. Do so only
1639 if we can easily see all uses - which is what almost always
1640 happens with the way vectorized stmts are inserted. */
1641 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1642 && ((is_gimple_assign (vec_stmt
)
1643 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1644 || (is_gimple_call (vec_stmt
)
1645 && !(gimple_call_flags (vec_stmt
)
1646 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1648 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1649 gimple_set_vdef (vec_stmt
, new_vdef
);
1650 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1654 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1656 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1659 if (dump_enabled_p ())
1661 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1662 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1663 dump_printf (MSG_NOTE
, "\n");
1666 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1669 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1670 a function declaration if the target has a vectorized version
1671 of the function, or NULL_TREE if the function cannot be vectorized. */
1674 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1676 tree fndecl
= gimple_call_fndecl (call
);
1678 /* We only handle functions that do not read or clobber memory -- i.e.
1679 const or novops ones. */
1680 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1684 || TREE_CODE (fndecl
) != FUNCTION_DECL
1685 || !DECL_BUILT_IN (fndecl
))
1688 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1692 /* Function vectorizable_call.
1694 Check if STMT performs a function call that can be vectorized.
1695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1696 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1697 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1700 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1706 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1707 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1708 tree vectype_out
, vectype_in
;
1711 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1712 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1713 tree fndecl
, new_temp
, def
, rhs_type
;
1715 enum vect_def_type dt
[3]
1716 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1717 gimple new_stmt
= NULL
;
1719 vec
<tree
> vargs
= vNULL
;
1720 enum { NARROW
, NONE
, WIDEN
} modifier
;
1724 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1727 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1730 /* Is STMT a vectorizable call? */
1731 if (!is_gimple_call (stmt
))
1734 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1737 if (stmt_can_throw_internal (stmt
))
1740 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1742 /* Process function arguments. */
1743 rhs_type
= NULL_TREE
;
1744 vectype_in
= NULL_TREE
;
1745 nargs
= gimple_call_num_args (stmt
);
1747 /* Bail out if the function has more than three arguments, we do not have
1748 interesting builtin functions to vectorize with more than two arguments
1749 except for fma. No arguments is also not good. */
1750 if (nargs
== 0 || nargs
> 3)
1753 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1754 if (gimple_call_internal_p (stmt
)
1755 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1758 rhs_type
= unsigned_type_node
;
1761 for (i
= 0; i
< nargs
; i
++)
1765 op
= gimple_call_arg (stmt
, i
);
1767 /* We can only handle calls with arguments of the same type. */
1769 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1771 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1773 "argument types differ.\n");
1777 rhs_type
= TREE_TYPE (op
);
1779 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1780 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1784 "use not simple.\n");
1789 vectype_in
= opvectype
;
1791 && opvectype
!= vectype_in
)
1793 if (dump_enabled_p ())
1794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1795 "argument vector types differ.\n");
1799 /* If all arguments are external or constant defs use a vector type with
1800 the same size as the output vector type. */
1802 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1804 gcc_assert (vectype_in
);
1807 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1810 "no vectype for scalar type ");
1811 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
1812 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1819 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1820 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1821 if (nunits_in
== nunits_out
/ 2)
1823 else if (nunits_out
== nunits_in
)
1825 else if (nunits_out
== nunits_in
/ 2)
1830 /* For now, we only vectorize functions if a target specific builtin
1831 is available. TODO -- in some cases, it might be profitable to
1832 insert the calls for pieces of the vector, in order to be able
1833 to vectorize other operations in the loop. */
1834 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1835 if (fndecl
== NULL_TREE
)
1837 if (gimple_call_internal_p (stmt
)
1838 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
1841 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1842 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
1843 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1844 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
1846 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1847 { 0, 1, 2, ... vf - 1 } vector. */
1848 gcc_assert (nargs
== 0);
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1854 "function is not vectorizable.\n");
1859 gcc_assert (!gimple_vuse (stmt
));
1861 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1863 else if (modifier
== NARROW
)
1864 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1866 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1868 /* Sanity check: make sure that at least one copy of the vectorized stmt
1869 needs to be generated. */
1870 gcc_assert (ncopies
>= 1);
1872 if (!vec_stmt
) /* transformation not required. */
1874 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
1878 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1884 if (dump_enabled_p ())
1885 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
1888 scalar_dest
= gimple_call_lhs (stmt
);
1889 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1891 prev_stmt_info
= NULL
;
1895 for (j
= 0; j
< ncopies
; ++j
)
1897 /* Build argument list for the vectorized call. */
1899 vargs
.create (nargs
);
1905 vec
<vec
<tree
> > vec_defs
;
1906 vec_defs
.create (nargs
);
1907 vec
<tree
> vec_oprnds0
;
1909 for (i
= 0; i
< nargs
; i
++)
1910 vargs
.quick_push (gimple_call_arg (stmt
, i
));
1911 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1912 vec_oprnds0
= vec_defs
[0];
1914 /* Arguments are ready. Create the new vector stmt. */
1915 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
1918 for (k
= 0; k
< nargs
; k
++)
1920 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
1921 vargs
[k
] = vec_oprndsk
[i
];
1923 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1924 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1925 gimple_call_set_lhs (new_stmt
, new_temp
);
1926 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1927 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
1930 for (i
= 0; i
< nargs
; i
++)
1932 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
1933 vec_oprndsi
.release ();
1935 vec_defs
.release ();
1939 for (i
= 0; i
< nargs
; i
++)
1941 op
= gimple_call_arg (stmt
, i
);
1944 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1947 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1949 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1952 vargs
.quick_push (vec_oprnd0
);
1955 if (gimple_call_internal_p (stmt
)
1956 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1958 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
1960 for (k
= 0; k
< nunits_out
; ++k
)
1961 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
1962 tree cst
= build_vector (vectype_out
, v
);
1964 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
1965 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
1966 new_temp
= make_ssa_name (new_var
, init_stmt
);
1967 gimple_assign_set_lhs (init_stmt
, new_temp
);
1968 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
1969 new_temp
= make_ssa_name (vec_dest
, NULL
);
1970 new_stmt
= gimple_build_assign (new_temp
,
1971 gimple_assign_lhs (init_stmt
));
1975 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1976 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1977 gimple_call_set_lhs (new_stmt
, new_temp
);
1979 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1982 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1984 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1986 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1992 for (j
= 0; j
< ncopies
; ++j
)
1994 /* Build argument list for the vectorized call. */
1996 vargs
.create (nargs
* 2);
2002 vec
<vec
<tree
> > vec_defs
;
2003 vec_defs
.create (nargs
);
2004 vec
<tree
> vec_oprnds0
;
2006 for (i
= 0; i
< nargs
; i
++)
2007 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2008 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2009 vec_oprnds0
= vec_defs
[0];
2011 /* Arguments are ready. Create the new vector stmt. */
2012 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2016 for (k
= 0; k
< nargs
; k
++)
2018 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2019 vargs
.quick_push (vec_oprndsk
[i
]);
2020 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2022 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2023 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2024 gimple_call_set_lhs (new_stmt
, new_temp
);
2025 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2026 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2029 for (i
= 0; i
< nargs
; i
++)
2031 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2032 vec_oprndsi
.release ();
2034 vec_defs
.release ();
2038 for (i
= 0; i
< nargs
; i
++)
2040 op
= gimple_call_arg (stmt
, i
);
2044 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2046 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2050 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2052 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2054 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2057 vargs
.quick_push (vec_oprnd0
);
2058 vargs
.quick_push (vec_oprnd1
);
2061 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2062 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2063 gimple_call_set_lhs (new_stmt
, new_temp
);
2064 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2067 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2069 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2071 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2074 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2079 /* No current target implements this case. */
2085 /* Update the exception handling table with the vector stmt if necessary. */
2086 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2087 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2089 /* The call in STMT might prevent it from being removed in dce.
2090 We however cannot remove it here, due to the way the ssa name
2091 it defines is mapped to the new definition. So just replace
2092 rhs of the statement with something harmless. */
2097 type
= TREE_TYPE (scalar_dest
);
2098 if (is_pattern_stmt_p (stmt_info
))
2099 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2101 lhs
= gimple_call_lhs (stmt
);
2102 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2103 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2104 set_vinfo_for_stmt (stmt
, NULL
);
2105 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2106 gsi_replace (gsi
, new_stmt
, false);
2107 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2113 /* Function vect_gen_widened_results_half
2115 Create a vector stmt whose code, type, number of arguments, and result
2116 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2117 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2118 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2119 needs to be created (DECL is a function-decl of a target-builtin).
2120 STMT is the original scalar stmt that we are vectorizing. */
2123 vect_gen_widened_results_half (enum tree_code code
,
2125 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2126 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2132 /* Generate half of the widened result: */
2133 if (code
== CALL_EXPR
)
2135 /* Target specific support */
2136 if (op_type
== binary_op
)
2137 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2139 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2140 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2141 gimple_call_set_lhs (new_stmt
, new_temp
);
2145 /* Generic support */
2146 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2147 if (op_type
!= binary_op
)
2149 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2151 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2152 gimple_assign_set_lhs (new_stmt
, new_temp
);
2154 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2160 /* Get vectorized definitions for loop-based vectorization. For the first
2161 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2162 scalar operand), and for the rest we get a copy with
2163 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2164 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2165 The vectors are collected into VEC_OPRNDS. */
2168 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2169 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
2173 /* Get first vector operand. */
2174 /* All the vector operands except the very first one (that is scalar oprnd)
2176 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2177 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2179 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2181 vec_oprnds
->quick_push (vec_oprnd
);
2183 /* Get second vector operand. */
2184 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2185 vec_oprnds
->quick_push (vec_oprnd
);
2189 /* For conversion in multiple steps, continue to get operands
2192 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2196 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2197 For multi-step conversions store the resulting vectors and call the function
2201 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
2202 int multi_step_cvt
, gimple stmt
,
2204 gimple_stmt_iterator
*gsi
,
2205 slp_tree slp_node
, enum tree_code code
,
2206 stmt_vec_info
*prev_stmt_info
)
2209 tree vop0
, vop1
, new_tmp
, vec_dest
;
2211 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2213 vec_dest
= vec_dsts
.pop ();
2215 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
2217 /* Create demotion operation. */
2218 vop0
= (*vec_oprnds
)[i
];
2219 vop1
= (*vec_oprnds
)[i
+ 1];
2220 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2221 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2222 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2223 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2226 /* Store the resulting vector for next recursive call. */
2227 (*vec_oprnds
)[i
/2] = new_tmp
;
2230 /* This is the last step of the conversion sequence. Store the
2231 vectors in SLP_NODE or in vector info of the scalar statement
2232 (or in STMT_VINFO_RELATED_STMT chain). */
2234 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2237 if (!*prev_stmt_info
)
2238 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2240 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2242 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2247 /* For multi-step demotion operations we first generate demotion operations
2248 from the source type to the intermediate types, and then combine the
2249 results (stored in VEC_OPRNDS) in demotion operation to the destination
2253 /* At each level of recursion we have half of the operands we had at the
2255 vec_oprnds
->truncate ((i
+1)/2);
2256 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2257 stmt
, vec_dsts
, gsi
, slp_node
,
2258 VEC_PACK_TRUNC_EXPR
,
2262 vec_dsts
.quick_push (vec_dest
);
2266 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2267 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2268 the resulting vectors and call the function recursively. */
2271 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
2272 vec
<tree
> *vec_oprnds1
,
2273 gimple stmt
, tree vec_dest
,
2274 gimple_stmt_iterator
*gsi
,
2275 enum tree_code code1
,
2276 enum tree_code code2
, tree decl1
,
2277 tree decl2
, int op_type
)
2280 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2281 gimple new_stmt1
, new_stmt2
;
2282 vec
<tree
> vec_tmp
= vNULL
;
2284 vec_tmp
.create (vec_oprnds0
->length () * 2);
2285 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
2287 if (op_type
== binary_op
)
2288 vop1
= (*vec_oprnds1
)[i
];
2292 /* Generate the two halves of promotion operation. */
2293 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2294 op_type
, vec_dest
, gsi
, stmt
);
2295 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2296 op_type
, vec_dest
, gsi
, stmt
);
2297 if (is_gimple_call (new_stmt1
))
2299 new_tmp1
= gimple_call_lhs (new_stmt1
);
2300 new_tmp2
= gimple_call_lhs (new_stmt2
);
2304 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2305 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2308 /* Store the results for the next step. */
2309 vec_tmp
.quick_push (new_tmp1
);
2310 vec_tmp
.quick_push (new_tmp2
);
2313 vec_oprnds0
->release ();
2314 *vec_oprnds0
= vec_tmp
;
2318 /* Check if STMT performs a conversion operation, that can be vectorized.
2319 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2320 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2321 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2324 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2325 gimple
*vec_stmt
, slp_tree slp_node
)
2329 tree op0
, op1
= NULL_TREE
;
2330 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2331 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2332 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2333 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2334 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2335 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2339 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2340 gimple new_stmt
= NULL
;
2341 stmt_vec_info prev_stmt_info
;
2344 tree vectype_out
, vectype_in
;
2346 tree lhs_type
, rhs_type
;
2347 enum { NARROW
, NONE
, WIDEN
} modifier
;
2348 vec
<tree
> vec_oprnds0
= vNULL
;
2349 vec
<tree
> vec_oprnds1
= vNULL
;
2351 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2352 int multi_step_cvt
= 0;
2353 vec
<tree
> vec_dsts
= vNULL
;
2354 vec
<tree
> interm_types
= vNULL
;
2355 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2357 enum machine_mode rhs_mode
;
2358 unsigned short fltsz
;
2360 /* Is STMT a vectorizable conversion? */
2362 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2365 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2368 if (!is_gimple_assign (stmt
))
2371 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2374 code
= gimple_assign_rhs_code (stmt
);
2375 if (!CONVERT_EXPR_CODE_P (code
)
2376 && code
!= FIX_TRUNC_EXPR
2377 && code
!= FLOAT_EXPR
2378 && code
!= WIDEN_MULT_EXPR
2379 && code
!= WIDEN_LSHIFT_EXPR
)
2382 op_type
= TREE_CODE_LENGTH (code
);
2384 /* Check types of lhs and rhs. */
2385 scalar_dest
= gimple_assign_lhs (stmt
);
2386 lhs_type
= TREE_TYPE (scalar_dest
);
2387 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2389 op0
= gimple_assign_rhs1 (stmt
);
2390 rhs_type
= TREE_TYPE (op0
);
2392 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2393 && !((INTEGRAL_TYPE_P (lhs_type
)
2394 && INTEGRAL_TYPE_P (rhs_type
))
2395 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2396 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2399 if ((INTEGRAL_TYPE_P (lhs_type
)
2400 && (TYPE_PRECISION (lhs_type
)
2401 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2402 || (INTEGRAL_TYPE_P (rhs_type
)
2403 && (TYPE_PRECISION (rhs_type
)
2404 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2406 if (dump_enabled_p ())
2407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2408 "type conversion to/from bit-precision unsupported."
2413 /* Check the operands of the operation. */
2414 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2415 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2417 if (dump_enabled_p ())
2418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2419 "use not simple.\n");
2422 if (op_type
== binary_op
)
2426 op1
= gimple_assign_rhs2 (stmt
);
2427 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2428 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2430 if (CONSTANT_CLASS_P (op0
))
2431 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2432 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2434 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2441 "use not simple.\n");
2446 /* If op0 is an external or constant defs use a vector type of
2447 the same size as the output vector type. */
2449 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2451 gcc_assert (vectype_in
);
2454 if (dump_enabled_p ())
2456 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2457 "no vectype for scalar type ");
2458 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2459 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2465 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2466 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2467 if (nunits_in
< nunits_out
)
2469 else if (nunits_out
== nunits_in
)
2474 /* Multiple types in SLP are handled by creating the appropriate number of
2475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2477 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2479 else if (modifier
== NARROW
)
2480 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2482 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2484 /* Sanity check: make sure that at least one copy of the vectorized stmt
2485 needs to be generated. */
2486 gcc_assert (ncopies
>= 1);
2488 /* Supportable by target? */
2492 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2494 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2501 "conversion not supported by target.\n");
2505 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2506 &code1
, &code2
, &multi_step_cvt
,
2509 /* Binary widening operation can only be supported directly by the
2511 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2515 if (code
!= FLOAT_EXPR
2516 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2517 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2520 rhs_mode
= TYPE_MODE (rhs_type
);
2521 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2522 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2523 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2524 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2527 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2528 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2529 if (cvt_type
== NULL_TREE
)
2532 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2534 if (!supportable_convert_operation (code
, vectype_out
,
2535 cvt_type
, &decl1
, &codecvt1
))
2538 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2539 cvt_type
, &codecvt1
,
2540 &codecvt2
, &multi_step_cvt
,
2544 gcc_assert (multi_step_cvt
== 0);
2546 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2547 vectype_in
, &code1
, &code2
,
2548 &multi_step_cvt
, &interm_types
))
2552 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2555 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2556 codecvt2
= ERROR_MARK
;
2560 interm_types
.safe_push (cvt_type
);
2561 cvt_type
= NULL_TREE
;
2566 gcc_assert (op_type
== unary_op
);
2567 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2568 &code1
, &multi_step_cvt
,
2572 if (code
!= FIX_TRUNC_EXPR
2573 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2574 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2577 rhs_mode
= TYPE_MODE (rhs_type
);
2579 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2580 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2581 if (cvt_type
== NULL_TREE
)
2583 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2586 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2587 &code1
, &multi_step_cvt
,
2596 if (!vec_stmt
) /* transformation not required. */
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_NOTE
, vect_location
,
2600 "=== vectorizable_conversion ===\n");
2601 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2603 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2604 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2606 else if (modifier
== NARROW
)
2608 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2609 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2613 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2614 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2616 interm_types
.release ();
2621 if (dump_enabled_p ())
2622 dump_printf_loc (MSG_NOTE
, vect_location
,
2623 "transform conversion. ncopies = %d.\n", ncopies
);
2625 if (op_type
== binary_op
)
2627 if (CONSTANT_CLASS_P (op0
))
2628 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2629 else if (CONSTANT_CLASS_P (op1
))
2630 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2633 /* In case of multi-step conversion, we first generate conversion operations
2634 to the intermediate types, and then from that types to the final one.
2635 We create vector destinations for the intermediate type (TYPES) received
2636 from supportable_*_operation, and store them in the correct order
2637 for future use in vect_create_vectorized_*_stmts (). */
2638 vec_dsts
.create (multi_step_cvt
+ 1);
2639 vec_dest
= vect_create_destination_var (scalar_dest
,
2640 (cvt_type
&& modifier
== WIDEN
)
2641 ? cvt_type
: vectype_out
);
2642 vec_dsts
.quick_push (vec_dest
);
2646 for (i
= interm_types
.length () - 1;
2647 interm_types
.iterate (i
, &intermediate_type
); i
--)
2649 vec_dest
= vect_create_destination_var (scalar_dest
,
2651 vec_dsts
.quick_push (vec_dest
);
2656 vec_dest
= vect_create_destination_var (scalar_dest
,
2658 ? vectype_out
: cvt_type
);
2662 if (modifier
== WIDEN
)
2664 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
2665 if (op_type
== binary_op
)
2666 vec_oprnds1
.create (1);
2668 else if (modifier
== NARROW
)
2669 vec_oprnds0
.create (
2670 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
2672 else if (code
== WIDEN_LSHIFT_EXPR
)
2673 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
2676 prev_stmt_info
= NULL
;
2680 for (j
= 0; j
< ncopies
; j
++)
2683 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2686 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2688 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2690 /* Arguments are ready, create the new vector stmt. */
2691 if (code1
== CALL_EXPR
)
2693 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2694 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2695 gimple_call_set_lhs (new_stmt
, new_temp
);
2699 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2700 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2702 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2703 gimple_assign_set_lhs (new_stmt
, new_temp
);
2706 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2708 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2712 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2714 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2715 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2720 /* In case the vectorization factor (VF) is bigger than the number
2721 of elements that we can fit in a vectype (nunits), we have to
2722 generate more than one vector stmt - i.e - we need to "unroll"
2723 the vector stmt by a factor VF/nunits. */
2724 for (j
= 0; j
< ncopies
; j
++)
2731 if (code
== WIDEN_LSHIFT_EXPR
)
2736 /* Store vec_oprnd1 for every vector stmt to be created
2737 for SLP_NODE. We check during the analysis that all
2738 the shift arguments are the same. */
2739 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2740 vec_oprnds1
.quick_push (vec_oprnd1
);
2742 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2746 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2747 &vec_oprnds1
, slp_node
, -1);
2751 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2752 vec_oprnds0
.quick_push (vec_oprnd0
);
2753 if (op_type
== binary_op
)
2755 if (code
== WIDEN_LSHIFT_EXPR
)
2758 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2760 vec_oprnds1
.quick_push (vec_oprnd1
);
2766 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2767 vec_oprnds0
.truncate (0);
2768 vec_oprnds0
.quick_push (vec_oprnd0
);
2769 if (op_type
== binary_op
)
2771 if (code
== WIDEN_LSHIFT_EXPR
)
2774 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2776 vec_oprnds1
.truncate (0);
2777 vec_oprnds1
.quick_push (vec_oprnd1
);
2781 /* Arguments are ready. Create the new vector stmts. */
2782 for (i
= multi_step_cvt
; i
>= 0; i
--)
2784 tree this_dest
= vec_dsts
[i
];
2785 enum tree_code c1
= code1
, c2
= code2
;
2786 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2791 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2793 stmt
, this_dest
, gsi
,
2794 c1
, c2
, decl1
, decl2
,
2798 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2802 if (codecvt1
== CALL_EXPR
)
2804 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2805 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2806 gimple_call_set_lhs (new_stmt
, new_temp
);
2810 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2811 new_temp
= make_ssa_name (vec_dest
, NULL
);
2812 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2817 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2820 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2823 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2826 if (!prev_stmt_info
)
2827 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2829 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2830 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2835 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2839 /* In case the vectorization factor (VF) is bigger than the number
2840 of elements that we can fit in a vectype (nunits), we have to
2841 generate more than one vector stmt - i.e - we need to "unroll"
2842 the vector stmt by a factor VF/nunits. */
2843 for (j
= 0; j
< ncopies
; j
++)
2847 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2851 vec_oprnds0
.truncate (0);
2852 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2853 vect_pow2 (multi_step_cvt
) - 1);
2856 /* Arguments are ready. Create the new vector stmts. */
2858 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2860 if (codecvt1
== CALL_EXPR
)
2862 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2863 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2864 gimple_call_set_lhs (new_stmt
, new_temp
);
2868 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2869 new_temp
= make_ssa_name (vec_dest
, NULL
);
2870 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2874 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2875 vec_oprnds0
[i
] = new_temp
;
2878 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2879 stmt
, vec_dsts
, gsi
,
2884 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2888 vec_oprnds0
.release ();
2889 vec_oprnds1
.release ();
2890 vec_dsts
.release ();
2891 interm_types
.release ();
2897 /* Function vectorizable_assignment.
2899 Check if STMT performs an assignment (copy) that can be vectorized.
2900 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2901 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2902 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2905 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2906 gimple
*vec_stmt
, slp_tree slp_node
)
2911 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2912 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2913 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2917 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2918 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2921 vec
<tree
> vec_oprnds
= vNULL
;
2923 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2924 gimple new_stmt
= NULL
;
2925 stmt_vec_info prev_stmt_info
= NULL
;
2926 enum tree_code code
;
2929 /* Multiple types in SLP are handled by creating the appropriate number of
2930 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2932 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2935 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2937 gcc_assert (ncopies
>= 1);
2939 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2942 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2945 /* Is vectorizable assignment? */
2946 if (!is_gimple_assign (stmt
))
2949 scalar_dest
= gimple_assign_lhs (stmt
);
2950 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2953 code
= gimple_assign_rhs_code (stmt
);
2954 if (gimple_assign_single_p (stmt
)
2955 || code
== PAREN_EXPR
2956 || CONVERT_EXPR_CODE_P (code
))
2957 op
= gimple_assign_rhs1 (stmt
);
2961 if (code
== VIEW_CONVERT_EXPR
)
2962 op
= TREE_OPERAND (op
, 0);
2964 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2965 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2967 if (dump_enabled_p ())
2968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2969 "use not simple.\n");
2973 /* We can handle NOP_EXPR conversions that do not change the number
2974 of elements or the vector size. */
2975 if ((CONVERT_EXPR_CODE_P (code
)
2976 || code
== VIEW_CONVERT_EXPR
)
2978 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2979 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2980 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2983 /* We do not handle bit-precision changes. */
2984 if ((CONVERT_EXPR_CODE_P (code
)
2985 || code
== VIEW_CONVERT_EXPR
)
2986 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2987 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2988 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2989 || ((TYPE_PRECISION (TREE_TYPE (op
))
2990 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2991 /* But a conversion that does not change the bit-pattern is ok. */
2992 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2993 > TYPE_PRECISION (TREE_TYPE (op
)))
2994 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2996 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2998 "type conversion to/from bit-precision "
3003 if (!vec_stmt
) /* transformation not required. */
3005 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3006 if (dump_enabled_p ())
3007 dump_printf_loc (MSG_NOTE
, vect_location
,
3008 "=== vectorizable_assignment ===\n");
3009 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3014 if (dump_enabled_p ())
3015 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
3018 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3021 for (j
= 0; j
< ncopies
; j
++)
3025 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
3027 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
3029 /* Arguments are ready. create the new vector stmt. */
3030 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3032 if (CONVERT_EXPR_CODE_P (code
)
3033 || code
== VIEW_CONVERT_EXPR
)
3034 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
3035 new_stmt
= gimple_build_assign (vec_dest
, vop
);
3036 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3037 gimple_assign_set_lhs (new_stmt
, new_temp
);
3038 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3040 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3047 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3049 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3051 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3054 vec_oprnds
.release ();
3059 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3060 either as shift by a scalar or by a vector. */
3063 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
3066 enum machine_mode vec_mode
;
3071 vectype
= get_vectype_for_scalar_type (scalar_type
);
3075 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3077 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
3079 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3081 || (optab_handler (optab
, TYPE_MODE (vectype
))
3082 == CODE_FOR_nothing
))
3086 vec_mode
= TYPE_MODE (vectype
);
3087 icode
= (int) optab_handler (optab
, vec_mode
);
3088 if (icode
== CODE_FOR_nothing
)
3095 /* Function vectorizable_shift.
3097 Check if STMT performs a shift operation that can be vectorized.
3098 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3099 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3100 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3103 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3104 gimple
*vec_stmt
, slp_tree slp_node
)
3108 tree op0
, op1
= NULL
;
3109 tree vec_oprnd1
= NULL_TREE
;
3110 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3112 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3113 enum tree_code code
;
3114 enum machine_mode vec_mode
;
3118 enum machine_mode optab_op2_mode
;
3121 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3122 gimple new_stmt
= NULL
;
3123 stmt_vec_info prev_stmt_info
;
3130 vec
<tree
> vec_oprnds0
= vNULL
;
3131 vec
<tree
> vec_oprnds1
= vNULL
;
3134 bool scalar_shift_arg
= true;
3135 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3138 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3141 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3144 /* Is STMT a vectorizable binary/unary operation? */
3145 if (!is_gimple_assign (stmt
))
3148 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3151 code
= gimple_assign_rhs_code (stmt
);
3153 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3154 || code
== RROTATE_EXPR
))
3157 scalar_dest
= gimple_assign_lhs (stmt
);
3158 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3159 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3160 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3162 if (dump_enabled_p ())
3163 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3164 "bit-precision shifts not supported.\n");
3168 op0
= gimple_assign_rhs1 (stmt
);
3169 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3170 &def_stmt
, &def
, &dt
[0], &vectype
))
3172 if (dump_enabled_p ())
3173 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3174 "use not simple.\n");
3177 /* If op0 is an external or constant def use a vector type with
3178 the same size as the output vector type. */
3180 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3182 gcc_assert (vectype
);
3185 if (dump_enabled_p ())
3186 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3187 "no vectype for scalar type\n");
3191 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3192 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3193 if (nunits_out
!= nunits_in
)
3196 op1
= gimple_assign_rhs2 (stmt
);
3197 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3198 &def
, &dt
[1], &op1_vectype
))
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3202 "use not simple.\n");
3207 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3211 /* Multiple types in SLP are handled by creating the appropriate number of
3212 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3214 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3217 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3219 gcc_assert (ncopies
>= 1);
3221 /* Determine whether the shift amount is a vector, or scalar. If the
3222 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3224 if (dt
[1] == vect_internal_def
&& !slp_node
)
3225 scalar_shift_arg
= false;
3226 else if (dt
[1] == vect_constant_def
3227 || dt
[1] == vect_external_def
3228 || dt
[1] == vect_internal_def
)
3230 /* In SLP, need to check whether the shift count is the same,
3231 in loops if it is a constant or invariant, it is always
3235 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3238 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
3239 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3240 scalar_shift_arg
= false;
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3247 "operand mode requires invariant argument.\n");
3251 /* Vector shifted by vector. */
3252 if (!scalar_shift_arg
)
3254 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_NOTE
, vect_location
,
3257 "vector/vector shift/rotate found.\n");
3260 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3261 if (op1_vectype
== NULL_TREE
3262 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3264 if (dump_enabled_p ())
3265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3266 "unusable type for last operand in"
3267 " vector/vector shift/rotate.\n");
3271 /* See if the machine has a vector shifted by scalar insn and if not
3272 then see if it has a vector shifted by vector insn. */
3275 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3277 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3279 if (dump_enabled_p ())
3280 dump_printf_loc (MSG_NOTE
, vect_location
,
3281 "vector/scalar shift/rotate found.\n");
3285 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3287 && (optab_handler (optab
, TYPE_MODE (vectype
))
3288 != CODE_FOR_nothing
))
3290 scalar_shift_arg
= false;
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_NOTE
, vect_location
,
3294 "vector/vector shift/rotate found.\n");
3296 /* Unlike the other binary operators, shifts/rotates have
3297 the rhs being int, instead of the same type as the lhs,
3298 so make sure the scalar is the right type if we are
3299 dealing with vectors of long long/long/short/char. */
3300 if (dt
[1] == vect_constant_def
)
3301 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3302 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3306 && TYPE_MODE (TREE_TYPE (vectype
))
3307 != TYPE_MODE (TREE_TYPE (op1
)))
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3311 "unusable type for last operand in"
3312 " vector/vector shift/rotate.\n");
3315 if (vec_stmt
&& !slp_node
)
3317 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3318 op1
= vect_init_vector (stmt
, op1
,
3319 TREE_TYPE (vectype
), NULL
);
3326 /* Supportable by target? */
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3334 vec_mode
= TYPE_MODE (vectype
);
3335 icode
= (int) optab_handler (optab
, vec_mode
);
3336 if (icode
== CODE_FOR_nothing
)
3338 if (dump_enabled_p ())
3339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3340 "op not supported by target.\n");
3341 /* Check only during analysis. */
3342 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3343 || (vf
< vect_min_worthwhile_factor (code
)
3346 if (dump_enabled_p ())
3347 dump_printf_loc (MSG_NOTE
, vect_location
,
3348 "proceeding using word mode.\n");
3351 /* Worthwhile without SIMD support? Check only during analysis. */
3352 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3353 && vf
< vect_min_worthwhile_factor (code
)
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3358 "not worthwhile without SIMD support.\n");
3362 if (!vec_stmt
) /* transformation not required. */
3364 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_NOTE
, vect_location
,
3367 "=== vectorizable_shift ===\n");
3368 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3374 if (dump_enabled_p ())
3375 dump_printf_loc (MSG_NOTE
, vect_location
,
3376 "transform binary/unary operation.\n");
3379 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3381 prev_stmt_info
= NULL
;
3382 for (j
= 0; j
< ncopies
; j
++)
3387 if (scalar_shift_arg
)
3389 /* Vector shl and shr insn patterns can be defined with scalar
3390 operand 2 (shift operand). In this case, use constant or loop
3391 invariant op1 directly, without extending it to vector mode
3393 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3394 if (!VECTOR_MODE_P (optab_op2_mode
))
3396 if (dump_enabled_p ())
3397 dump_printf_loc (MSG_NOTE
, vect_location
,
3398 "operand 1 using scalar mode.\n");
3400 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
3401 vec_oprnds1
.quick_push (vec_oprnd1
);
3404 /* Store vec_oprnd1 for every vector stmt to be created
3405 for SLP_NODE. We check during the analysis that all
3406 the shift arguments are the same.
3407 TODO: Allow different constants for different vector
3408 stmts generated for an SLP instance. */
3409 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3410 vec_oprnds1
.quick_push (vec_oprnd1
);
3415 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3416 (a special case for certain kind of vector shifts); otherwise,
3417 operand 1 should be of a vector type (the usual case). */
3419 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3422 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3426 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3428 /* Arguments are ready. Create the new vector stmt. */
3429 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3431 vop1
= vec_oprnds1
[i
];
3432 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3433 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3434 gimple_assign_set_lhs (new_stmt
, new_temp
);
3435 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3437 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3444 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3446 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3447 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3450 vec_oprnds0
.release ();
3451 vec_oprnds1
.release ();
3457 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3458 gimple_stmt_iterator
*);
3461 /* Function vectorizable_operation.
3463 Check if STMT performs a binary, unary or ternary operation that can
3465 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3466 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3470 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3471 gimple
*vec_stmt
, slp_tree slp_node
)
3475 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3476 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3478 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3479 enum tree_code code
;
3480 enum machine_mode vec_mode
;
3487 enum vect_def_type dt
[3]
3488 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3489 gimple new_stmt
= NULL
;
3490 stmt_vec_info prev_stmt_info
;
3496 vec
<tree
> vec_oprnds0
= vNULL
;
3497 vec
<tree
> vec_oprnds1
= vNULL
;
3498 vec
<tree
> vec_oprnds2
= vNULL
;
3499 tree vop0
, vop1
, vop2
;
3500 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3503 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3506 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3509 /* Is STMT a vectorizable binary/unary operation? */
3510 if (!is_gimple_assign (stmt
))
3513 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3516 code
= gimple_assign_rhs_code (stmt
);
3518 /* For pointer addition, we should use the normal plus for
3519 the vector addition. */
3520 if (code
== POINTER_PLUS_EXPR
)
3523 /* Support only unary or binary operations. */
3524 op_type
= TREE_CODE_LENGTH (code
);
3525 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3529 "num. args = %d (not unary/binary/ternary op).\n",
3534 scalar_dest
= gimple_assign_lhs (stmt
);
3535 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3537 /* Most operations cannot handle bit-precision types without extra
3539 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3540 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3541 /* Exception are bitwise binary operations. */
3542 && code
!= BIT_IOR_EXPR
3543 && code
!= BIT_XOR_EXPR
3544 && code
!= BIT_AND_EXPR
)
3546 if (dump_enabled_p ())
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3548 "bit-precision arithmetic not supported.\n");
3552 op0
= gimple_assign_rhs1 (stmt
);
3553 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3554 &def_stmt
, &def
, &dt
[0], &vectype
))
3556 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3558 "use not simple.\n");
3561 /* If op0 is an external or constant def use a vector type with
3562 the same size as the output vector type. */
3564 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3566 gcc_assert (vectype
);
3569 if (dump_enabled_p ())
3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3572 "no vectype for scalar type ");
3573 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3575 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3581 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3582 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3583 if (nunits_out
!= nunits_in
)
3586 if (op_type
== binary_op
|| op_type
== ternary_op
)
3588 op1
= gimple_assign_rhs2 (stmt
);
3589 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3592 if (dump_enabled_p ())
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3594 "use not simple.\n");
3598 if (op_type
== ternary_op
)
3600 op2
= gimple_assign_rhs3 (stmt
);
3601 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3604 if (dump_enabled_p ())
3605 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3606 "use not simple.\n");
3612 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3616 /* Multiple types in SLP are handled by creating the appropriate number of
3617 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3619 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3622 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3624 gcc_assert (ncopies
>= 1);
3626 /* Shifts are handled in vectorizable_shift (). */
3627 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3628 || code
== RROTATE_EXPR
)
3631 /* Supportable by target? */
3633 vec_mode
= TYPE_MODE (vectype
);
3634 if (code
== MULT_HIGHPART_EXPR
)
3636 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3637 icode
= LAST_INSN_CODE
;
3639 icode
= CODE_FOR_nothing
;
3643 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3646 if (dump_enabled_p ())
3647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3651 icode
= (int) optab_handler (optab
, vec_mode
);
3654 if (icode
== CODE_FOR_nothing
)
3656 if (dump_enabled_p ())
3657 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3658 "op not supported by target.\n");
3659 /* Check only during analysis. */
3660 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3661 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3663 if (dump_enabled_p ())
3664 dump_printf_loc (MSG_NOTE
, vect_location
,
3665 "proceeding using word mode.\n");
3668 /* Worthwhile without SIMD support? Check only during analysis. */
3669 if (!VECTOR_MODE_P (vec_mode
)
3671 && vf
< vect_min_worthwhile_factor (code
))
3673 if (dump_enabled_p ())
3674 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3675 "not worthwhile without SIMD support.\n");
3679 if (!vec_stmt
) /* transformation not required. */
3681 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3682 if (dump_enabled_p ())
3683 dump_printf_loc (MSG_NOTE
, vect_location
,
3684 "=== vectorizable_operation ===\n");
3685 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_NOTE
, vect_location
,
3693 "transform binary/unary operation.\n");
3696 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3698 /* In case the vectorization factor (VF) is bigger than the number
3699 of elements that we can fit in a vectype (nunits), we have to generate
3700 more than one vector stmt - i.e - we need to "unroll" the
3701 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3702 from one copy of the vector stmt to the next, in the field
3703 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3704 stages to find the correct vector defs to be used when vectorizing
3705 stmts that use the defs of the current stmt. The example below
3706 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3707 we need to create 4 vectorized stmts):
3709 before vectorization:
3710 RELATED_STMT VEC_STMT
3714 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3716 RELATED_STMT VEC_STMT
3717 VS1_0: vx0 = memref0 VS1_1 -
3718 VS1_1: vx1 = memref1 VS1_2 -
3719 VS1_2: vx2 = memref2 VS1_3 -
3720 VS1_3: vx3 = memref3 - -
3721 S1: x = load - VS1_0
3724 step2: vectorize stmt S2 (done here):
3725 To vectorize stmt S2 we first need to find the relevant vector
3726 def for the first operand 'x'. This is, as usual, obtained from
3727 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3728 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3729 relevant vector def 'vx0'. Having found 'vx0' we can generate
3730 the vector stmt VS2_0, and as usual, record it in the
3731 STMT_VINFO_VEC_STMT of stmt S2.
3732 When creating the second copy (VS2_1), we obtain the relevant vector
3733 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3734 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3735 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3736 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3737 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3738 chain of stmts and pointers:
3739 RELATED_STMT VEC_STMT
3740 VS1_0: vx0 = memref0 VS1_1 -
3741 VS1_1: vx1 = memref1 VS1_2 -
3742 VS1_2: vx2 = memref2 VS1_3 -
3743 VS1_3: vx3 = memref3 - -
3744 S1: x = load - VS1_0
3745 VS2_0: vz0 = vx0 + v1 VS2_1 -
3746 VS2_1: vz1 = vx1 + v1 VS2_2 -
3747 VS2_2: vz2 = vx2 + v1 VS2_3 -
3748 VS2_3: vz3 = vx3 + v1 - -
3749 S2: z = x + 1 - VS2_0 */
3751 prev_stmt_info
= NULL
;
3752 for (j
= 0; j
< ncopies
; j
++)
3757 if (op_type
== binary_op
|| op_type
== ternary_op
)
3758 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3761 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3763 if (op_type
== ternary_op
)
3765 vec_oprnds2
.create (1);
3766 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
3773 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3774 if (op_type
== ternary_op
)
3776 tree vec_oprnd
= vec_oprnds2
.pop ();
3777 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
3782 /* Arguments are ready. Create the new vector stmt. */
3783 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3785 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3786 ? vec_oprnds1
[i
] : NULL_TREE
);
3787 vop2
= ((op_type
== ternary_op
)
3788 ? vec_oprnds2
[i
] : NULL_TREE
);
3789 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3791 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3792 gimple_assign_set_lhs (new_stmt
, new_temp
);
3793 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3795 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3802 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3804 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3805 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3808 vec_oprnds0
.release ();
3809 vec_oprnds1
.release ();
3810 vec_oprnds2
.release ();
3815 /* A helper function to ensure data reference DR's base alignment
3819 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
3824 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
3826 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3827 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
3829 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
3830 DECL_USER_ALIGN (base_decl
) = 1;
3831 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
3836 /* Function vectorizable_store.
3838 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3840 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3841 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3842 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3845 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3851 tree vec_oprnd
= NULL_TREE
;
3852 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3853 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3854 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3856 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3857 struct loop
*loop
= NULL
;
3858 enum machine_mode vec_mode
;
3860 enum dr_alignment_support alignment_support_scheme
;
3863 enum vect_def_type dt
;
3864 stmt_vec_info prev_stmt_info
= NULL
;
3865 tree dataref_ptr
= NULL_TREE
;
3866 tree dataref_offset
= NULL_TREE
;
3867 gimple ptr_incr
= NULL
;
3868 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3871 gimple next_stmt
, first_stmt
= NULL
;
3872 bool grouped_store
= false;
3873 bool store_lanes_p
= false;
3874 unsigned int group_size
, i
;
3875 vec
<tree
> dr_chain
= vNULL
;
3876 vec
<tree
> oprnds
= vNULL
;
3877 vec
<tree
> result_chain
= vNULL
;
3879 vec
<tree
> vec_oprnds
= vNULL
;
3880 bool slp
= (slp_node
!= NULL
);
3881 unsigned int vec_num
;
3882 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3886 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3888 /* Multiple types in SLP are handled by creating the appropriate number of
3889 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3891 if (slp
|| PURE_SLP_STMT (stmt_info
))
3894 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3896 gcc_assert (ncopies
>= 1);
3898 /* FORNOW. This restriction should be relaxed. */
3899 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3901 if (dump_enabled_p ())
3902 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3903 "multiple types in nested loop.\n");
3907 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3910 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3913 /* Is vectorizable store? */
3915 if (!is_gimple_assign (stmt
))
3918 scalar_dest
= gimple_assign_lhs (stmt
);
3919 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3920 && is_pattern_stmt_p (stmt_info
))
3921 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3922 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3923 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
3924 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3925 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3926 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3927 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3928 && TREE_CODE (scalar_dest
) != MEM_REF
)
3931 gcc_assert (gimple_assign_single_p (stmt
));
3932 op
= gimple_assign_rhs1 (stmt
);
3933 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3936 if (dump_enabled_p ())
3937 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3938 "use not simple.\n");
3942 elem_type
= TREE_TYPE (vectype
);
3943 vec_mode
= TYPE_MODE (vectype
);
3945 /* FORNOW. In some cases can vectorize even if data-type not supported
3946 (e.g. - array initialization with 0). */
3947 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3950 if (!STMT_VINFO_DATA_REF (stmt_info
))
3953 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3954 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3955 size_zero_node
) < 0)
3957 if (dump_enabled_p ())
3958 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3959 "negative step for store.\n");
3963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3965 grouped_store
= true;
3966 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3967 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3969 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3970 if (vect_store_lanes_supported (vectype
, group_size
))
3971 store_lanes_p
= true;
3972 else if (!vect_grouped_store_supported (vectype
, group_size
))
3976 if (first_stmt
== stmt
)
3978 /* STMT is the leader of the group. Check the operands of all the
3979 stmts of the group. */
3980 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3983 gcc_assert (gimple_assign_single_p (next_stmt
));
3984 op
= gimple_assign_rhs1 (next_stmt
);
3985 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3986 &def_stmt
, &def
, &dt
))
3988 if (dump_enabled_p ())
3989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3990 "use not simple.\n");
3993 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3998 if (!vec_stmt
) /* transformation not required. */
4000 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
4001 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
4008 ensure_base_align (stmt_info
, dr
);
4012 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4013 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4015 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
4018 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
4020 /* We vectorize all the stmts of the interleaving group when we
4021 reach the last stmt in the group. */
4022 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
4023 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
4032 grouped_store
= false;
4033 /* VEC_NUM is the number of vect stmts to be created for this
4035 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4036 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4037 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4038 op
= gimple_assign_rhs1 (first_stmt
);
4041 /* VEC_NUM is the number of vect stmts to be created for this
4043 vec_num
= group_size
;
4049 group_size
= vec_num
= 1;
4052 if (dump_enabled_p ())
4053 dump_printf_loc (MSG_NOTE
, vect_location
,
4054 "transform store. ncopies = %d\n", ncopies
);
4056 dr_chain
.create (group_size
);
4057 oprnds
.create (group_size
);
4059 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4060 gcc_assert (alignment_support_scheme
);
4061 /* Targets with store-lane instructions must not require explicit
4063 gcc_assert (!store_lanes_p
4064 || alignment_support_scheme
== dr_aligned
4065 || alignment_support_scheme
== dr_unaligned_supported
);
4068 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4070 aggr_type
= vectype
;
4072 /* In case the vectorization factor (VF) is bigger than the number
4073 of elements that we can fit in a vectype (nunits), we have to generate
4074 more than one vector stmt - i.e - we need to "unroll" the
4075 vector stmt by a factor VF/nunits. For more details see documentation in
4076 vect_get_vec_def_for_copy_stmt. */
4078 /* In case of interleaving (non-unit grouped access):
4085 We create vectorized stores starting from base address (the access of the
4086 first stmt in the chain (S2 in the above example), when the last store stmt
4087 of the chain (S4) is reached:
4090 VS2: &base + vec_size*1 = vx0
4091 VS3: &base + vec_size*2 = vx1
4092 VS4: &base + vec_size*3 = vx3
4094 Then permutation statements are generated:
4096 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4097 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4100 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4101 (the order of the data-refs in the output of vect_permute_store_chain
4102 corresponds to the order of scalar stmts in the interleaving chain - see
4103 the documentation of vect_permute_store_chain()).
4105 In case of both multiple types and interleaving, above vector stores and
4106 permutation stmts are created for every copy. The result vector stmts are
4107 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4108 STMT_VINFO_RELATED_STMT for the next copies.
4111 prev_stmt_info
= NULL
;
4112 for (j
= 0; j
< ncopies
; j
++)
4120 /* Get vectorized arguments for SLP_NODE. */
4121 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4122 NULL
, slp_node
, -1);
4124 vec_oprnd
= vec_oprnds
[0];
4128 /* For interleaved stores we collect vectorized defs for all the
4129 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4130 used as an input to vect_permute_store_chain(), and OPRNDS as
4131 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4133 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4134 OPRNDS are of size 1. */
4135 next_stmt
= first_stmt
;
4136 for (i
= 0; i
< group_size
; i
++)
4138 /* Since gaps are not supported for interleaved stores,
4139 GROUP_SIZE is the exact number of stmts in the chain.
4140 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4141 there is no interleaving, GROUP_SIZE is 1, and only one
4142 iteration of the loop will be executed. */
4143 gcc_assert (next_stmt
4144 && gimple_assign_single_p (next_stmt
));
4145 op
= gimple_assign_rhs1 (next_stmt
);
4147 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4149 dr_chain
.quick_push (vec_oprnd
);
4150 oprnds
.quick_push (vec_oprnd
);
4151 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4155 /* We should have catched mismatched types earlier. */
4156 gcc_assert (useless_type_conversion_p (vectype
,
4157 TREE_TYPE (vec_oprnd
)));
4158 bool simd_lane_access_p
4159 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
4160 if (simd_lane_access_p
4161 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
4162 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
4163 && integer_zerop (DR_OFFSET (first_dr
))
4164 && integer_zerop (DR_INIT (first_dr
))
4165 && alias_sets_conflict_p (get_alias_set (aggr_type
),
4166 get_alias_set (DR_REF (first_dr
))))
4168 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
4169 dataref_offset
= build_int_cst (reference_alias_ptr_type
4170 (DR_REF (first_dr
)), 0);
4175 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
4176 simd_lane_access_p
? loop
: NULL
,
4177 NULL_TREE
, &dummy
, gsi
, &ptr_incr
,
4178 simd_lane_access_p
, &inv_p
);
4179 gcc_assert (bb_vinfo
|| !inv_p
);
4183 /* For interleaved stores we created vectorized defs for all the
4184 defs stored in OPRNDS in the previous iteration (previous copy).
4185 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4186 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4188 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4189 OPRNDS are of size 1. */
4190 for (i
= 0; i
< group_size
; i
++)
4193 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4195 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4196 dr_chain
[i
] = vec_oprnd
;
4197 oprnds
[i
] = vec_oprnd
;
4201 = int_const_binop (PLUS_EXPR
, dataref_offset
,
4202 TYPE_SIZE_UNIT (aggr_type
));
4204 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4205 TYPE_SIZE_UNIT (aggr_type
));
4212 /* Combine all the vectors into an array. */
4213 vec_array
= create_vector_array (vectype
, vec_num
);
4214 for (i
= 0; i
< vec_num
; i
++)
4216 vec_oprnd
= dr_chain
[i
];
4217 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4221 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4222 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4223 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4224 gimple_call_set_lhs (new_stmt
, data_ref
);
4225 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4233 result_chain
.create (group_size
);
4235 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4239 next_stmt
= first_stmt
;
4240 for (i
= 0; i
< vec_num
; i
++)
4242 unsigned align
, misalign
;
4245 /* Bump the vector pointer. */
4246 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4250 vec_oprnd
= vec_oprnds
[i
];
4251 else if (grouped_store
)
4252 /* For grouped stores vectorized defs are interleaved in
4253 vect_permute_store_chain(). */
4254 vec_oprnd
= result_chain
[i
];
4256 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4259 : build_int_cst (reference_alias_ptr_type
4260 (DR_REF (first_dr
)), 0));
4261 align
= TYPE_ALIGN_UNIT (vectype
);
4262 if (aligned_access_p (first_dr
))
4264 else if (DR_MISALIGNMENT (first_dr
) == -1)
4266 TREE_TYPE (data_ref
)
4267 = build_aligned_type (TREE_TYPE (data_ref
),
4268 TYPE_ALIGN (elem_type
));
4269 align
= TYPE_ALIGN_UNIT (elem_type
);
4274 TREE_TYPE (data_ref
)
4275 = build_aligned_type (TREE_TYPE (data_ref
),
4276 TYPE_ALIGN (elem_type
));
4277 misalign
= DR_MISALIGNMENT (first_dr
);
4279 if (dataref_offset
== NULL_TREE
)
4280 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4283 /* Arguments are ready. Create the new vector stmt. */
4284 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4285 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4290 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4298 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4300 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4301 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4305 dr_chain
.release ();
4307 result_chain
.release ();
4308 vec_oprnds
.release ();
4313 /* Given a vector type VECTYPE and permutation SEL returns
4314 the VECTOR_CST mask that implements the permutation of the
4315 vector elements. If that is impossible to do, returns NULL. */
4318 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4320 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4323 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4325 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4328 mask_elt_type
= lang_hooks
.types
.type_for_mode
4329 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4330 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4332 mask_elts
= XALLOCAVEC (tree
, nunits
);
4333 for (i
= nunits
- 1; i
>= 0; i
--)
4334 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4335 mask_vec
= build_vector (mask_type
, mask_elts
);
4340 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4341 reversal of the vector elements. If that is impossible to do,
4345 perm_mask_for_reverse (tree vectype
)
4350 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4351 sel
= XALLOCAVEC (unsigned char, nunits
);
4353 for (i
= 0; i
< nunits
; ++i
)
4354 sel
[i
] = nunits
- 1 - i
;
4356 return vect_gen_perm_mask (vectype
, sel
);
4359 /* Given a vector variable X and Y, that was generated for the scalar
4360 STMT, generate instructions to permute the vector elements of X and Y
4361 using permutation mask MASK_VEC, insert them at *GSI and return the
4362 permuted vector variable. */
4365 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4366 gimple_stmt_iterator
*gsi
)
4368 tree vectype
= TREE_TYPE (x
);
4369 tree perm_dest
, data_ref
;
4372 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4373 data_ref
= make_ssa_name (perm_dest
, NULL
);
4375 /* Generate the permute statement. */
4376 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4378 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4383 /* vectorizable_load.
4385 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4387 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4388 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4389 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4392 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4393 slp_tree slp_node
, slp_instance slp_node_instance
)
4396 tree vec_dest
= NULL
;
4397 tree data_ref
= NULL
;
4398 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4399 stmt_vec_info prev_stmt_info
;
4400 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4401 struct loop
*loop
= NULL
;
4402 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4403 bool nested_in_vect_loop
= false;
4404 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4405 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4408 enum machine_mode mode
;
4409 gimple new_stmt
= NULL
;
4411 enum dr_alignment_support alignment_support_scheme
;
4412 tree dataref_ptr
= NULL_TREE
;
4413 tree dataref_offset
= NULL_TREE
;
4414 gimple ptr_incr
= NULL
;
4415 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4417 int i
, j
, group_size
, group_gap
;
4418 tree msq
= NULL_TREE
, lsq
;
4419 tree offset
= NULL_TREE
;
4420 tree realignment_token
= NULL_TREE
;
4422 vec
<tree
> dr_chain
= vNULL
;
4423 bool grouped_load
= false;
4424 bool load_lanes_p
= false;
4427 bool negative
= false;
4428 bool compute_in_loop
= false;
4429 struct loop
*at_loop
;
4431 bool slp
= (slp_node
!= NULL
);
4432 bool slp_perm
= false;
4433 enum tree_code code
;
4434 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4437 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4438 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4439 int gather_scale
= 1;
4440 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4444 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4445 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4446 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4451 /* Multiple types in SLP are handled by creating the appropriate number of
4452 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4454 if (slp
|| PURE_SLP_STMT (stmt_info
))
4457 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4459 gcc_assert (ncopies
>= 1);
4461 /* FORNOW. This restriction should be relaxed. */
4462 if (nested_in_vect_loop
&& ncopies
> 1)
4464 if (dump_enabled_p ())
4465 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4466 "multiple types in nested loop.\n");
4470 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4473 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4476 /* Is vectorizable load? */
4477 if (!is_gimple_assign (stmt
))
4480 scalar_dest
= gimple_assign_lhs (stmt
);
4481 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4484 code
= gimple_assign_rhs_code (stmt
);
4485 if (code
!= ARRAY_REF
4486 && code
!= BIT_FIELD_REF
4487 && code
!= INDIRECT_REF
4488 && code
!= COMPONENT_REF
4489 && code
!= IMAGPART_EXPR
4490 && code
!= REALPART_EXPR
4492 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4495 if (!STMT_VINFO_DATA_REF (stmt_info
))
4498 elem_type
= TREE_TYPE (vectype
);
4499 mode
= TYPE_MODE (vectype
);
4501 /* FORNOW. In some cases can vectorize even if data-type not supported
4502 (e.g. - data copies). */
4503 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4505 if (dump_enabled_p ())
4506 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4507 "Aligned load, but unsupported type.\n");
4511 /* Check if the load is a part of an interleaving chain. */
4512 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4514 grouped_load
= true;
4516 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4518 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4519 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4521 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4522 if (vect_load_lanes_supported (vectype
, group_size
))
4523 load_lanes_p
= true;
4524 else if (!vect_grouped_load_supported (vectype
, group_size
))
4530 if (STMT_VINFO_GATHER_P (stmt_info
))
4534 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4535 &gather_off
, &gather_scale
);
4536 gcc_assert (gather_decl
);
4537 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4538 &def_stmt
, &def
, &gather_dt
,
4539 &gather_off_vectype
))
4541 if (dump_enabled_p ())
4542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4543 "gather index use not simple.\n");
4547 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4551 negative
= tree_int_cst_compare (nested_in_vect_loop
4552 ? STMT_VINFO_DR_STEP (stmt_info
)
4554 size_zero_node
) < 0;
4555 if (negative
&& ncopies
> 1)
4557 if (dump_enabled_p ())
4558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4559 "multiple types with negative step.\n");
4567 if (dump_enabled_p ())
4568 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4569 "negative step for group load not supported"
4573 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4574 if (alignment_support_scheme
!= dr_aligned
4575 && alignment_support_scheme
!= dr_unaligned_supported
)
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4579 "negative step but alignment required.\n");
4582 if (!perm_mask_for_reverse (vectype
))
4584 if (dump_enabled_p ())
4585 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4586 "negative step and reversing not supported."
4593 if (!vec_stmt
) /* transformation not required. */
4595 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4596 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4600 if (dump_enabled_p ())
4601 dump_printf_loc (MSG_NOTE
, vect_location
,
4602 "transform load. ncopies = %d\n", ncopies
);
4606 ensure_base_align (stmt_info
, dr
);
4608 if (STMT_VINFO_GATHER_P (stmt_info
))
4610 tree vec_oprnd0
= NULL_TREE
, op
;
4611 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4612 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4613 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4614 edge pe
= loop_preheader_edge (loop
);
4617 enum { NARROW
, NONE
, WIDEN
} modifier
;
4618 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4620 if (nunits
== gather_off_nunits
)
4622 else if (nunits
== gather_off_nunits
/ 2)
4624 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4627 for (i
= 0; i
< gather_off_nunits
; ++i
)
4628 sel
[i
] = i
| nunits
;
4630 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4631 gcc_assert (perm_mask
!= NULL_TREE
);
4633 else if (nunits
== gather_off_nunits
* 2)
4635 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4638 for (i
= 0; i
< nunits
; ++i
)
4639 sel
[i
] = i
< gather_off_nunits
4640 ? i
: i
+ nunits
- gather_off_nunits
;
4642 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4643 gcc_assert (perm_mask
!= NULL_TREE
);
4649 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4650 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4651 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4652 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4653 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4654 scaletype
= TREE_VALUE (arglist
);
4655 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4656 && types_compatible_p (srctype
, masktype
));
4658 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4660 ptr
= fold_convert (ptrtype
, gather_base
);
4661 if (!is_gimple_min_invariant (ptr
))
4663 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4664 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4665 gcc_assert (!new_bb
);
4668 /* Currently we support only unconditional gather loads,
4669 so mask should be all ones. */
4670 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4671 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4672 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4676 for (j
= 0; j
< 6; ++j
)
4678 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4679 mask
= build_real (TREE_TYPE (masktype
), r
);
4683 mask
= build_vector_from_val (masktype
, mask
);
4684 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4686 scale
= build_int_cst (scaletype
, gather_scale
);
4688 prev_stmt_info
= NULL
;
4689 for (j
= 0; j
< ncopies
; ++j
)
4691 if (modifier
== WIDEN
&& (j
& 1))
4692 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4693 perm_mask
, stmt
, gsi
);
4696 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4699 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4701 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4703 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4704 == TYPE_VECTOR_SUBPARTS (idxtype
));
4705 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4706 var
= make_ssa_name (var
, NULL
);
4707 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4709 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4711 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4716 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4718 if (!useless_type_conversion_p (vectype
, rettype
))
4720 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4721 == TYPE_VECTOR_SUBPARTS (rettype
));
4722 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4723 op
= make_ssa_name (var
, new_stmt
);
4724 gimple_call_set_lhs (new_stmt
, op
);
4725 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4726 var
= make_ssa_name (vec_dest
, NULL
);
4727 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4729 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4734 var
= make_ssa_name (vec_dest
, new_stmt
);
4735 gimple_call_set_lhs (new_stmt
, var
);
4738 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4740 if (modifier
== NARROW
)
4747 var
= permute_vec_elements (prev_res
, var
,
4748 perm_mask
, stmt
, gsi
);
4749 new_stmt
= SSA_NAME_DEF_STMT (var
);
4752 if (prev_stmt_info
== NULL
)
4753 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4755 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4756 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4760 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4762 gimple_stmt_iterator incr_gsi
;
4768 vec
<constructor_elt
, va_gc
> *v
= NULL
;
4769 gimple_seq stmts
= NULL
;
4770 tree stride_base
, stride_step
, alias_off
;
4772 gcc_assert (!nested_in_vect_loop
);
4775 = fold_build_pointer_plus
4776 (unshare_expr (DR_BASE_ADDRESS (dr
)),
4777 size_binop (PLUS_EXPR
,
4778 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
4779 convert_to_ptrofftype (DR_INIT (dr
))));
4780 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
4782 /* For a load with loop-invariant (but other than power-of-2)
4783 stride (i.e. not a grouped access) like so:
4785 for (i = 0; i < n; i += stride)
4788 we generate a new induction variable and new accesses to
4789 form a new vector (or vectors, depending on ncopies):
4791 for (j = 0; ; j += VF*stride)
4793 tmp2 = array[j + stride];
4795 vectemp = {tmp1, tmp2, ...}
4798 ivstep
= stride_step
;
4799 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4800 build_int_cst (TREE_TYPE (ivstep
), vf
));
4802 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4804 create_iv (stride_base
, ivstep
, NULL
,
4805 loop
, &incr_gsi
, insert_after
,
4807 incr
= gsi_stmt (incr_gsi
);
4808 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4810 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4812 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4814 prev_stmt_info
= NULL
;
4815 running_off
= offvar
;
4816 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
4817 for (j
= 0; j
< ncopies
; j
++)
4821 vec_alloc (v
, nunits
);
4822 for (i
= 0; i
< nunits
; i
++)
4824 tree newref
, newoff
;
4826 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
4827 running_off
, alias_off
);
4829 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4832 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4833 newoff
= copy_ssa_name (running_off
, NULL
);
4834 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4835 running_off
, stride_step
);
4836 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4838 running_off
= newoff
;
4841 vec_inv
= build_constructor (vectype
, v
);
4842 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4843 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4846 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4848 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4849 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4856 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4858 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
4859 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
4860 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4862 /* Check if the chain of loads is already vectorized. */
4863 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
4864 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4865 ??? But we can only do so if there is exactly one
4866 as we have no way to get at the rest. Leave the CSE
4868 ??? With the group load eventually participating
4869 in multiple different permutations (having multiple
4870 slp nodes which refer to the same group) the CSE
4871 is even wrong code. See PR56270. */
4874 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4877 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4878 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4880 /* VEC_NUM is the number of vect stmts to be created for this group. */
4883 grouped_load
= false;
4884 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4885 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
4887 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
4891 vec_num
= group_size
;
4899 group_size
= vec_num
= 1;
4903 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4904 gcc_assert (alignment_support_scheme
);
4905 /* Targets with load-lane instructions must not require explicit
4907 gcc_assert (!load_lanes_p
4908 || alignment_support_scheme
== dr_aligned
4909 || alignment_support_scheme
== dr_unaligned_supported
);
4911 /* In case the vectorization factor (VF) is bigger than the number
4912 of elements that we can fit in a vectype (nunits), we have to generate
4913 more than one vector stmt - i.e - we need to "unroll" the
4914 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4915 from one copy of the vector stmt to the next, in the field
4916 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4917 stages to find the correct vector defs to be used when vectorizing
4918 stmts that use the defs of the current stmt. The example below
4919 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4920 need to create 4 vectorized stmts):
4922 before vectorization:
4923 RELATED_STMT VEC_STMT
4927 step 1: vectorize stmt S1:
4928 We first create the vector stmt VS1_0, and, as usual, record a
4929 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4930 Next, we create the vector stmt VS1_1, and record a pointer to
4931 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4932 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4934 RELATED_STMT VEC_STMT
4935 VS1_0: vx0 = memref0 VS1_1 -
4936 VS1_1: vx1 = memref1 VS1_2 -
4937 VS1_2: vx2 = memref2 VS1_3 -
4938 VS1_3: vx3 = memref3 - -
4939 S1: x = load - VS1_0
4942 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4943 information we recorded in RELATED_STMT field is used to vectorize
4946 /* In case of interleaving (non-unit grouped access):
4953 Vectorized loads are created in the order of memory accesses
4954 starting from the access of the first stmt of the chain:
4957 VS2: vx1 = &base + vec_size*1
4958 VS3: vx3 = &base + vec_size*2
4959 VS4: vx4 = &base + vec_size*3
4961 Then permutation statements are generated:
4963 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4964 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4967 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4968 (the order of the data-refs in the output of vect_permute_load_chain
4969 corresponds to the order of scalar stmts in the interleaving chain - see
4970 the documentation of vect_permute_load_chain()).
4971 The generation of permutation stmts and recording them in
4972 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4974 In case of both multiple types and interleaving, the vector loads and
4975 permutation stmts above are created for every copy. The result vector
4976 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4977 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4979 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4980 on a target that supports unaligned accesses (dr_unaligned_supported)
4981 we generate the following code:
4985 p = p + indx * vectype_size;
4990 Otherwise, the data reference is potentially unaligned on a target that
4991 does not support unaligned accesses (dr_explicit_realign_optimized) -
4992 then generate the following code, in which the data in each iteration is
4993 obtained by two vector loads, one from the previous iteration, and one
4994 from the current iteration:
4996 msq_init = *(floor(p1))
4997 p2 = initial_addr + VS - 1;
4998 realignment_token = call target_builtin;
5001 p2 = p2 + indx * vectype_size
5003 vec_dest = realign_load (msq, lsq, realignment_token)
5008 /* If the misalignment remains the same throughout the execution of the
5009 loop, we can create the init_addr and permutation mask at the loop
5010 preheader. Otherwise, it needs to be created inside the loop.
5011 This can only occur when vectorizing memory accesses in the inner-loop
5012 nested within an outer-loop that is being vectorized. */
5014 if (nested_in_vect_loop
5015 && (TREE_INT_CST_LOW (DR_STEP (dr
))
5016 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
5018 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
5019 compute_in_loop
= true;
5022 if ((alignment_support_scheme
== dr_explicit_realign_optimized
5023 || alignment_support_scheme
== dr_explicit_realign
)
5024 && !compute_in_loop
)
5026 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
5027 alignment_support_scheme
, NULL_TREE
,
5029 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5031 phi
= SSA_NAME_DEF_STMT (msq
);
5032 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5039 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5042 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5044 aggr_type
= vectype
;
5046 prev_stmt_info
= NULL
;
5047 for (j
= 0; j
< ncopies
; j
++)
5049 /* 1. Create the vector or array pointer update chain. */
5052 bool simd_lane_access_p
5053 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5054 if (simd_lane_access_p
5055 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5056 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5057 && integer_zerop (DR_OFFSET (first_dr
))
5058 && integer_zerop (DR_INIT (first_dr
))
5059 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5060 get_alias_set (DR_REF (first_dr
)))
5061 && (alignment_support_scheme
== dr_aligned
5062 || alignment_support_scheme
== dr_unaligned_supported
))
5064 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5065 dataref_offset
= build_int_cst (reference_alias_ptr_type
5066 (DR_REF (first_dr
)), 0);
5071 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
5072 offset
, &dummy
, gsi
, &ptr_incr
,
5073 simd_lane_access_p
, &inv_p
);
5075 else if (dataref_offset
)
5076 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
5077 TYPE_SIZE_UNIT (aggr_type
));
5079 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5080 TYPE_SIZE_UNIT (aggr_type
));
5082 if (grouped_load
|| slp_perm
)
5083 dr_chain
.create (vec_num
);
5089 vec_array
= create_vector_array (vectype
, vec_num
);
5092 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5093 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5094 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
5095 gimple_call_set_lhs (new_stmt
, vec_array
);
5096 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5098 /* Extract each vector into an SSA_NAME. */
5099 for (i
= 0; i
< vec_num
; i
++)
5101 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
5103 dr_chain
.quick_push (new_temp
);
5106 /* Record the mapping between SSA_NAMEs and statements. */
5107 vect_record_grouped_load_vectors (stmt
, dr_chain
);
5111 for (i
= 0; i
< vec_num
; i
++)
5114 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5117 /* 2. Create the vector-load in the loop. */
5118 switch (alignment_support_scheme
)
5121 case dr_unaligned_supported
:
5123 unsigned int align
, misalign
;
5126 = build2 (MEM_REF
, vectype
, dataref_ptr
,
5129 : build_int_cst (reference_alias_ptr_type
5130 (DR_REF (first_dr
)), 0));
5131 align
= TYPE_ALIGN_UNIT (vectype
);
5132 if (alignment_support_scheme
== dr_aligned
)
5134 gcc_assert (aligned_access_p (first_dr
));
5137 else if (DR_MISALIGNMENT (first_dr
) == -1)
5139 TREE_TYPE (data_ref
)
5140 = build_aligned_type (TREE_TYPE (data_ref
),
5141 TYPE_ALIGN (elem_type
));
5142 align
= TYPE_ALIGN_UNIT (elem_type
);
5147 TREE_TYPE (data_ref
)
5148 = build_aligned_type (TREE_TYPE (data_ref
),
5149 TYPE_ALIGN (elem_type
));
5150 misalign
= DR_MISALIGNMENT (first_dr
);
5152 if (dataref_offset
== NULL_TREE
)
5153 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
5157 case dr_explicit_realign
:
5162 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5164 if (compute_in_loop
)
5165 msq
= vect_setup_realignment (first_stmt
, gsi
,
5167 dr_explicit_realign
,
5170 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
5171 new_stmt
= gimple_build_assign_with_ops
5172 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
5174 (TREE_TYPE (dataref_ptr
),
5175 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5176 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5178 = build2 (MEM_REF
, vectype
, ptr
,
5179 build_int_cst (reference_alias_ptr_type
5180 (DR_REF (first_dr
)), 0));
5181 vec_dest
= vect_create_destination_var (scalar_dest
,
5183 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5184 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5185 gimple_assign_set_lhs (new_stmt
, new_temp
);
5186 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5187 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5188 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5191 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5192 TYPE_SIZE_UNIT (elem_type
));
5193 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5194 new_stmt
= gimple_build_assign_with_ops
5195 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5198 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5199 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
5200 gimple_assign_set_lhs (new_stmt
, ptr
);
5201 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5203 = build2 (MEM_REF
, vectype
, ptr
,
5204 build_int_cst (reference_alias_ptr_type
5205 (DR_REF (first_dr
)), 0));
5208 case dr_explicit_realign_optimized
:
5209 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5210 new_stmt
= gimple_build_assign_with_ops
5211 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5213 (TREE_TYPE (dataref_ptr
),
5214 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5215 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5217 = build2 (MEM_REF
, vectype
, new_temp
,
5218 build_int_cst (reference_alias_ptr_type
5219 (DR_REF (first_dr
)), 0));
5224 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5225 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5226 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5227 gimple_assign_set_lhs (new_stmt
, new_temp
);
5228 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5230 /* 3. Handle explicit realignment if necessary/supported.
5232 vec_dest = realign_load (msq, lsq, realignment_token) */
5233 if (alignment_support_scheme
== dr_explicit_realign_optimized
5234 || alignment_support_scheme
== dr_explicit_realign
)
5236 lsq
= gimple_assign_lhs (new_stmt
);
5237 if (!realignment_token
)
5238 realignment_token
= dataref_ptr
;
5239 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5241 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
5244 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5245 gimple_assign_set_lhs (new_stmt
, new_temp
);
5246 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5248 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5251 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5252 add_phi_arg (phi
, lsq
,
5253 loop_latch_edge (containing_loop
),
5259 /* 4. Handle invariant-load. */
5260 if (inv_p
&& !bb_vinfo
)
5262 gimple_stmt_iterator gsi2
= *gsi
;
5263 gcc_assert (!grouped_load
);
5265 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5267 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5272 tree perm_mask
= perm_mask_for_reverse (vectype
);
5273 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5274 perm_mask
, stmt
, gsi
);
5275 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5278 /* Collect vector loads and later create their permutation in
5279 vect_transform_grouped_load (). */
5280 if (grouped_load
|| slp_perm
)
5281 dr_chain
.quick_push (new_temp
);
5283 /* Store vector loads in the corresponding SLP_NODE. */
5284 if (slp
&& !slp_perm
)
5285 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5287 /* Bump the vector pointer to account for a gap. */
5288 if (slp
&& group_gap
!= 0)
5290 tree bump
= size_binop (MULT_EXPR
,
5291 TYPE_SIZE_UNIT (elem_type
),
5292 size_int (group_gap
));
5293 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5298 if (slp
&& !slp_perm
)
5303 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
5304 slp_node_instance
, false))
5306 dr_chain
.release ();
5315 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5316 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5321 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5323 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5324 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5327 dr_chain
.release ();
5333 /* Function vect_is_simple_cond.
5336 LOOP - the loop that is being vectorized.
5337 COND - Condition that is checked for simple use.
5340 *COMP_VECTYPE - the vector type for the comparison.
5342 Returns whether a COND can be vectorized. Checks whether
5343 condition operands are supportable using vec_is_simple_use. */
5346 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5347 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5351 enum vect_def_type dt
;
5352 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5354 if (!COMPARISON_CLASS_P (cond
))
5357 lhs
= TREE_OPERAND (cond
, 0);
5358 rhs
= TREE_OPERAND (cond
, 1);
5360 if (TREE_CODE (lhs
) == SSA_NAME
)
5362 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5363 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5364 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5367 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5368 && TREE_CODE (lhs
) != FIXED_CST
)
5371 if (TREE_CODE (rhs
) == SSA_NAME
)
5373 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5374 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5375 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5378 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5379 && TREE_CODE (rhs
) != FIXED_CST
)
5382 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5386 /* vectorizable_condition.
5388 Check if STMT is conditional modify expression that can be vectorized.
5389 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5390 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5393 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5394 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5395 else caluse if it is 2).
5397 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5400 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5401 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5404 tree scalar_dest
= NULL_TREE
;
5405 tree vec_dest
= NULL_TREE
;
5406 tree cond_expr
, then_clause
, else_clause
;
5407 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5408 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5409 tree comp_vectype
= NULL_TREE
;
5410 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5411 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5412 tree vec_compare
, vec_cond_expr
;
5414 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5416 enum vect_def_type dt
, dts
[4];
5417 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5419 enum tree_code code
;
5420 stmt_vec_info prev_stmt_info
= NULL
;
5422 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5423 vec
<tree
> vec_oprnds0
= vNULL
;
5424 vec
<tree
> vec_oprnds1
= vNULL
;
5425 vec
<tree
> vec_oprnds2
= vNULL
;
5426 vec
<tree
> vec_oprnds3
= vNULL
;
5429 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5432 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5434 gcc_assert (ncopies
>= 1);
5435 if (reduc_index
&& ncopies
> 1)
5436 return false; /* FORNOW */
5438 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5441 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5444 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5445 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5449 /* FORNOW: not yet supported. */
5450 if (STMT_VINFO_LIVE_P (stmt_info
))
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5454 "value used after loop.\n");
5458 /* Is vectorizable conditional operation? */
5459 if (!is_gimple_assign (stmt
))
5462 code
= gimple_assign_rhs_code (stmt
);
5464 if (code
!= COND_EXPR
)
5467 cond_expr
= gimple_assign_rhs1 (stmt
);
5468 then_clause
= gimple_assign_rhs2 (stmt
);
5469 else_clause
= gimple_assign_rhs3 (stmt
);
5471 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5476 if (TREE_CODE (then_clause
) == SSA_NAME
)
5478 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5479 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5480 &then_def_stmt
, &def
, &dt
))
5483 else if (TREE_CODE (then_clause
) != INTEGER_CST
5484 && TREE_CODE (then_clause
) != REAL_CST
5485 && TREE_CODE (then_clause
) != FIXED_CST
)
5488 if (TREE_CODE (else_clause
) == SSA_NAME
)
5490 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5491 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5492 &else_def_stmt
, &def
, &dt
))
5495 else if (TREE_CODE (else_clause
) != INTEGER_CST
5496 && TREE_CODE (else_clause
) != REAL_CST
5497 && TREE_CODE (else_clause
) != FIXED_CST
)
5500 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
5501 /* The result of a vector comparison should be signed type. */
5502 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
5503 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
5504 if (vec_cmp_type
== NULL_TREE
)
5509 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5510 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5517 vec_oprnds0
.create (1);
5518 vec_oprnds1
.create (1);
5519 vec_oprnds2
.create (1);
5520 vec_oprnds3
.create (1);
5524 scalar_dest
= gimple_assign_lhs (stmt
);
5525 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5527 /* Handle cond expr. */
5528 for (j
= 0; j
< ncopies
; j
++)
5530 gimple new_stmt
= NULL
;
5535 stack_vec
<tree
, 4> ops
;
5536 stack_vec
<vec
<tree
>, 4> vec_defs
;
5538 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
5539 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
5540 ops
.safe_push (then_clause
);
5541 ops
.safe_push (else_clause
);
5542 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5543 vec_oprnds3
= vec_defs
.pop ();
5544 vec_oprnds2
= vec_defs
.pop ();
5545 vec_oprnds1
= vec_defs
.pop ();
5546 vec_oprnds0
= vec_defs
.pop ();
5549 vec_defs
.release ();
5555 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5557 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5558 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5561 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5563 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5564 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5565 if (reduc_index
== 1)
5566 vec_then_clause
= reduc_def
;
5569 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5571 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5572 NULL
, >emp
, &def
, &dts
[2]);
5574 if (reduc_index
== 2)
5575 vec_else_clause
= reduc_def
;
5578 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5580 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5581 NULL
, >emp
, &def
, &dts
[3]);
5587 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5588 vec_oprnds0
.pop ());
5589 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5590 vec_oprnds1
.pop ());
5591 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5592 vec_oprnds2
.pop ());
5593 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5594 vec_oprnds3
.pop ());
5599 vec_oprnds0
.quick_push (vec_cond_lhs
);
5600 vec_oprnds1
.quick_push (vec_cond_rhs
);
5601 vec_oprnds2
.quick_push (vec_then_clause
);
5602 vec_oprnds3
.quick_push (vec_else_clause
);
5605 /* Arguments are ready. Create the new vector stmt. */
5606 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
5608 vec_cond_rhs
= vec_oprnds1
[i
];
5609 vec_then_clause
= vec_oprnds2
[i
];
5610 vec_else_clause
= vec_oprnds3
[i
];
5612 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
5613 vec_cond_lhs
, vec_cond_rhs
);
5614 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5615 vec_compare
, vec_then_clause
, vec_else_clause
);
5617 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5618 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5619 gimple_assign_set_lhs (new_stmt
, new_temp
);
5620 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5622 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5629 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5631 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5633 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5636 vec_oprnds0
.release ();
5637 vec_oprnds1
.release ();
5638 vec_oprnds2
.release ();
5639 vec_oprnds3
.release ();
5645 /* Make sure the statement is vectorizable. */
5648 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5650 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5651 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5652 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5654 tree scalar_type
, vectype
;
5655 gimple pattern_stmt
;
5656 gimple_seq pattern_def_seq
;
5658 if (dump_enabled_p ())
5660 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
5661 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5662 dump_printf (MSG_NOTE
, "\n");
5665 if (gimple_has_volatile_ops (stmt
))
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5669 "not vectorized: stmt has volatile operands\n");
5674 /* Skip stmts that do not need to be vectorized. In loops this is expected
5676 - the COND_EXPR which is the loop exit condition
5677 - any LABEL_EXPRs in the loop
5678 - computations that are used only for array indexing or loop control.
5679 In basic blocks we only analyze statements that are a part of some SLP
5680 instance, therefore, all the statements are relevant.
5682 Pattern statement needs to be analyzed instead of the original statement
5683 if the original statement is not relevant. Otherwise, we analyze both
5684 statements. In basic blocks we are called from some SLP instance
5685 traversal, don't analyze pattern stmts instead, the pattern stmts
5686 already will be part of SLP instance. */
5688 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5689 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5690 && !STMT_VINFO_LIVE_P (stmt_info
))
5692 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5694 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5695 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5697 /* Analyze PATTERN_STMT instead of the original stmt. */
5698 stmt
= pattern_stmt
;
5699 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5700 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_NOTE
, vect_location
,
5703 "==> examining pattern statement: ");
5704 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5705 dump_printf (MSG_NOTE
, "\n");
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
5716 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5719 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5720 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5722 /* Analyze PATTERN_STMT too. */
5723 if (dump_enabled_p ())
5725 dump_printf_loc (MSG_NOTE
, vect_location
,
5726 "==> examining pattern statement: ");
5727 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5728 dump_printf (MSG_NOTE
, "\n");
5731 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5735 if (is_pattern_stmt_p (stmt_info
)
5737 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5739 gimple_stmt_iterator si
;
5741 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5743 gimple pattern_def_stmt
= gsi_stmt (si
);
5744 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5745 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5747 /* Analyze def stmt of STMT if it's a pattern stmt. */
5748 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_NOTE
, vect_location
,
5751 "==> examining pattern def statement: ");
5752 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
5753 dump_printf (MSG_NOTE
, "\n");
5756 if (!vect_analyze_stmt (pattern_def_stmt
,
5757 need_to_vectorize
, node
))
5763 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5765 case vect_internal_def
:
5768 case vect_reduction_def
:
5769 case vect_nested_cycle
:
5770 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5771 || relevance
== vect_used_in_outer_by_reduction
5772 || relevance
== vect_unused_in_scope
));
5775 case vect_induction_def
:
5776 case vect_constant_def
:
5777 case vect_external_def
:
5778 case vect_unknown_def_type
:
5785 gcc_assert (PURE_SLP_STMT (stmt_info
));
5787 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5788 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_NOTE
, vect_location
,
5791 "get vectype for scalar type: ");
5792 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
5793 dump_printf (MSG_NOTE
, "\n");
5796 vectype
= get_vectype_for_scalar_type (scalar_type
);
5799 if (dump_enabled_p ())
5801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5802 "not SLPed: unsupported data-type ");
5803 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5805 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5810 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
5813 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
5814 dump_printf (MSG_NOTE
, "\n");
5817 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5820 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5822 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5823 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5824 *need_to_vectorize
= true;
5829 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5830 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5831 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5832 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5833 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5834 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5835 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5836 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5837 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5838 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5839 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5843 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5844 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5845 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5846 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5847 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5848 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5849 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5850 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5855 if (dump_enabled_p ())
5857 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5858 "not vectorized: relevant stmt not ");
5859 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5861 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5870 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5871 need extra handling, except for vectorizable reductions. */
5872 if (STMT_VINFO_LIVE_P (stmt_info
)
5873 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5874 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5878 if (dump_enabled_p ())
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5881 "not vectorized: live stmt not ");
5882 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5883 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5884 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5894 /* Function vect_transform_stmt.
5896 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5899 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5900 bool *grouped_store
, slp_tree slp_node
,
5901 slp_instance slp_node_instance
)
5903 bool is_store
= false;
5904 gimple vec_stmt
= NULL
;
5905 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5908 switch (STMT_VINFO_TYPE (stmt_info
))
5910 case type_demotion_vec_info_type
:
5911 case type_promotion_vec_info_type
:
5912 case type_conversion_vec_info_type
:
5913 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5917 case induc_vec_info_type
:
5918 gcc_assert (!slp_node
);
5919 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5923 case shift_vec_info_type
:
5924 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5928 case op_vec_info_type
:
5929 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5933 case assignment_vec_info_type
:
5934 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5938 case load_vec_info_type
:
5939 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5944 case store_vec_info_type
:
5945 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5947 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5949 /* In case of interleaving, the whole chain is vectorized when the
5950 last store in the chain is reached. Store stmts before the last
5951 one are skipped, and there vec_stmt_info shouldn't be freed
5953 *grouped_store
= true;
5954 if (STMT_VINFO_VEC_STMT (stmt_info
))
5961 case condition_vec_info_type
:
5962 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5966 case call_vec_info_type
:
5967 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5968 stmt
= gsi_stmt (*gsi
);
5971 case reduc_vec_info_type
:
5972 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5977 if (!STMT_VINFO_LIVE_P (stmt_info
))
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5981 "stmt not supported.\n");
5986 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5987 is being vectorized, but outside the immediately enclosing loop. */
5989 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5990 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5991 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5992 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5993 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5994 || STMT_VINFO_RELEVANT (stmt_info
) ==
5995 vect_used_in_outer_by_reduction
))
5997 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5998 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5999 imm_use_iterator imm_iter
;
6000 use_operand_p use_p
;
6004 if (dump_enabled_p ())
6005 dump_printf_loc (MSG_NOTE
, vect_location
,
6006 "Record the vdef for outer-loop vectorization.\n");
6008 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6009 (to be used when vectorizing outer-loop stmts that use the DEF of
6011 if (gimple_code (stmt
) == GIMPLE_PHI
)
6012 scalar_dest
= PHI_RESULT (stmt
);
6014 scalar_dest
= gimple_assign_lhs (stmt
);
6016 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
6018 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
6020 exit_phi
= USE_STMT (use_p
);
6021 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
6026 /* Handle stmts whose DEF is used outside the loop-nest that is
6027 being vectorized. */
6028 if (STMT_VINFO_LIVE_P (stmt_info
)
6029 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
6031 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
6036 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
6042 /* Remove a group of stores (for SLP or interleaving), free their
6046 vect_remove_stores (gimple first_stmt
)
6048 gimple next
= first_stmt
;
6050 gimple_stmt_iterator next_si
;
6054 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
6056 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
6057 if (is_pattern_stmt_p (stmt_info
))
6058 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
6059 /* Free the attached stmt_vec_info and remove the stmt. */
6060 next_si
= gsi_for_stmt (next
);
6061 unlink_stmt_vdef (next
);
6062 gsi_remove (&next_si
, true);
6063 release_defs (next
);
6064 free_stmt_vec_info (next
);
6070 /* Function new_stmt_vec_info.
6072 Create and initialize a new stmt_vec_info struct for STMT. */
6075 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
6076 bb_vec_info bb_vinfo
)
6079 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
6081 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
6082 STMT_VINFO_STMT (res
) = stmt
;
6083 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
6084 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
6085 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
6086 STMT_VINFO_LIVE_P (res
) = false;
6087 STMT_VINFO_VECTYPE (res
) = NULL
;
6088 STMT_VINFO_VEC_STMT (res
) = NULL
;
6089 STMT_VINFO_VECTORIZABLE (res
) = true;
6090 STMT_VINFO_IN_PATTERN_P (res
) = false;
6091 STMT_VINFO_RELATED_STMT (res
) = NULL
;
6092 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
6093 STMT_VINFO_DATA_REF (res
) = NULL
;
6095 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
6096 STMT_VINFO_DR_OFFSET (res
) = NULL
;
6097 STMT_VINFO_DR_INIT (res
) = NULL
;
6098 STMT_VINFO_DR_STEP (res
) = NULL
;
6099 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
6101 if (gimple_code (stmt
) == GIMPLE_PHI
6102 && is_loop_header_bb_p (gimple_bb (stmt
)))
6103 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
6105 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
6107 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
6108 STMT_SLP_TYPE (res
) = loop_vect
;
6109 GROUP_FIRST_ELEMENT (res
) = NULL
;
6110 GROUP_NEXT_ELEMENT (res
) = NULL
;
6111 GROUP_SIZE (res
) = 0;
6112 GROUP_STORE_COUNT (res
) = 0;
6113 GROUP_GAP (res
) = 0;
6114 GROUP_SAME_DR_STMT (res
) = NULL
;
6120 /* Create a hash table for stmt_vec_info. */
6123 init_stmt_vec_info_vec (void)
6125 gcc_assert (!stmt_vec_info_vec
.exists ());
6126 stmt_vec_info_vec
.create (50);
6130 /* Free hash table for stmt_vec_info. */
6133 free_stmt_vec_info_vec (void)
6137 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
6139 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
6140 gcc_assert (stmt_vec_info_vec
.exists ());
6141 stmt_vec_info_vec
.release ();
6145 /* Free stmt vectorization related info. */
6148 free_stmt_vec_info (gimple stmt
)
6150 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6155 /* Check if this statement has a related "pattern stmt"
6156 (introduced by the vectorizer during the pattern recognition
6157 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6159 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
6161 stmt_vec_info patt_info
6162 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6165 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
6168 gimple_stmt_iterator si
;
6169 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
6170 free_stmt_vec_info (gsi_stmt (si
));
6172 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
6176 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
6177 set_vinfo_for_stmt (stmt
, NULL
);
6182 /* Function get_vectype_for_scalar_type_and_size.
6184 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6188 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
6190 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
6191 enum machine_mode simd_mode
;
6192 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
6199 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
6200 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
6203 /* For vector types of elements whose mode precision doesn't
6204 match their types precision we use a element type of mode
6205 precision. The vectorization routines will have to make sure
6206 they support the proper result truncation/extension.
6207 We also make sure to build vector types with INTEGER_TYPE
6208 component type only. */
6209 if (INTEGRAL_TYPE_P (scalar_type
)
6210 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
6211 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
6212 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
6213 TYPE_UNSIGNED (scalar_type
));
6215 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6216 When the component mode passes the above test simply use a type
6217 corresponding to that mode. The theory is that any use that
6218 would cause problems with this will disable vectorization anyway. */
6219 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
6220 && !INTEGRAL_TYPE_P (scalar_type
))
6221 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
6223 /* We can't build a vector type of elements with alignment bigger than
6225 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
6226 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
6227 TYPE_UNSIGNED (scalar_type
));
6229 /* If we felt back to using the mode fail if there was
6230 no scalar type for it. */
6231 if (scalar_type
== NULL_TREE
)
6234 /* If no size was supplied use the mode the target prefers. Otherwise
6235 lookup a vector mode of the specified size. */
6237 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6239 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6240 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6244 vectype
= build_vector_type (scalar_type
, nunits
);
6246 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6247 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6253 unsigned int current_vector_size
;
6255 /* Function get_vectype_for_scalar_type.
6257 Returns the vector type corresponding to SCALAR_TYPE as supported
6261 get_vectype_for_scalar_type (tree scalar_type
)
6264 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6265 current_vector_size
);
6267 && current_vector_size
== 0)
6268 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6272 /* Function get_same_sized_vectype
6274 Returns a vector type corresponding to SCALAR_TYPE of size
6275 VECTOR_TYPE if supported by the target. */
6278 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6280 return get_vectype_for_scalar_type_and_size
6281 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6284 /* Function vect_is_simple_use.
6287 LOOP_VINFO - the vect info of the loop that is being vectorized.
6288 BB_VINFO - the vect info of the basic block that is being vectorized.
6289 OPERAND - operand of STMT in the loop or bb.
6290 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6292 Returns whether a stmt with OPERAND can be vectorized.
6293 For loops, supportable operands are constants, loop invariants, and operands
6294 that are defined by the current iteration of the loop. Unsupportable
6295 operands are those that are defined by a previous iteration of the loop (as
6296 is the case in reduction/induction computations).
6297 For basic blocks, supportable operands are constants and bb invariants.
6298 For now, operands defined outside the basic block are not supported. */
6301 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6302 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6303 tree
*def
, enum vect_def_type
*dt
)
6306 stmt_vec_info stmt_vinfo
;
6307 struct loop
*loop
= NULL
;
6310 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6315 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_NOTE
, vect_location
,
6318 "vect_is_simple_use: operand ");
6319 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
6320 dump_printf (MSG_NOTE
, "\n");
6323 if (CONSTANT_CLASS_P (operand
))
6325 *dt
= vect_constant_def
;
6329 if (is_gimple_min_invariant (operand
))
6332 *dt
= vect_external_def
;
6336 if (TREE_CODE (operand
) == PAREN_EXPR
)
6338 if (dump_enabled_p ())
6339 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
6340 operand
= TREE_OPERAND (operand
, 0);
6343 if (TREE_CODE (operand
) != SSA_NAME
)
6345 if (dump_enabled_p ())
6346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6351 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6352 if (*def_stmt
== NULL
)
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6360 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
6363 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
6364 dump_printf (MSG_NOTE
, "\n");
6367 /* Empty stmt is expected only in case of a function argument.
6368 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6369 if (gimple_nop_p (*def_stmt
))
6372 *dt
= vect_external_def
;
6376 bb
= gimple_bb (*def_stmt
);
6378 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6379 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6380 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6381 *dt
= vect_external_def
;
6384 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6385 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6388 if (*dt
== vect_unknown_def_type
6390 && *dt
== vect_double_reduction_def
6391 && gimple_code (stmt
) != GIMPLE_PHI
))
6393 if (dump_enabled_p ())
6394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6395 "Unsupported pattern.\n");
6399 if (dump_enabled_p ())
6400 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
6402 switch (gimple_code (*def_stmt
))
6405 *def
= gimple_phi_result (*def_stmt
);
6409 *def
= gimple_assign_lhs (*def_stmt
);
6413 *def
= gimple_call_lhs (*def_stmt
);
6418 if (dump_enabled_p ())
6419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6420 "unsupported defining stmt:\n");
6427 /* Function vect_is_simple_use_1.
6429 Same as vect_is_simple_use_1 but also determines the vector operand
6430 type of OPERAND and stores it to *VECTYPE. If the definition of
6431 OPERAND is vect_uninitialized_def, vect_constant_def or
6432 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6433 is responsible to compute the best suited vector type for the
6437 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6438 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6439 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6441 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6445 /* Now get a vector type if the def is internal, otherwise supply
6446 NULL_TREE and leave it up to the caller to figure out a proper
6447 type for the use stmt. */
6448 if (*dt
== vect_internal_def
6449 || *dt
== vect_induction_def
6450 || *dt
== vect_reduction_def
6451 || *dt
== vect_double_reduction_def
6452 || *dt
== vect_nested_cycle
)
6454 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6456 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6457 && !STMT_VINFO_RELEVANT (stmt_info
)
6458 && !STMT_VINFO_LIVE_P (stmt_info
))
6459 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6461 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6462 gcc_assert (*vectype
!= NULL_TREE
);
6464 else if (*dt
== vect_uninitialized_def
6465 || *dt
== vect_constant_def
6466 || *dt
== vect_external_def
)
6467 *vectype
= NULL_TREE
;
6475 /* Function supportable_widening_operation
6477 Check whether an operation represented by the code CODE is a
6478 widening operation that is supported by the target platform in
6479 vector form (i.e., when operating on arguments of type VECTYPE_IN
6480 producing a result of type VECTYPE_OUT).
6482 Widening operations we currently support are NOP (CONVERT), FLOAT
6483 and WIDEN_MULT. This function checks if these operations are supported
6484 by the target platform either directly (via vector tree-codes), or via
6488 - CODE1 and CODE2 are codes of vector operations to be used when
6489 vectorizing the operation, if available.
6490 - MULTI_STEP_CVT determines the number of required intermediate steps in
6491 case of multi-step conversion (like char->short->int - in that case
6492 MULTI_STEP_CVT will be 1).
6493 - INTERM_TYPES contains the intermediate type required to perform the
6494 widening operation (short in the above example). */
6497 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6498 tree vectype_out
, tree vectype_in
,
6499 enum tree_code
*code1
, enum tree_code
*code2
,
6500 int *multi_step_cvt
,
6501 vec
<tree
> *interm_types
)
6503 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6504 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6505 struct loop
*vect_loop
= NULL
;
6506 enum machine_mode vec_mode
;
6507 enum insn_code icode1
, icode2
;
6508 optab optab1
, optab2
;
6509 tree vectype
= vectype_in
;
6510 tree wide_vectype
= vectype_out
;
6511 enum tree_code c1
, c2
;
6513 tree prev_type
, intermediate_type
;
6514 enum machine_mode intermediate_mode
, prev_mode
;
6515 optab optab3
, optab4
;
6517 *multi_step_cvt
= 0;
6519 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6523 case WIDEN_MULT_EXPR
:
6524 /* The result of a vectorized widening operation usually requires
6525 two vectors (because the widened results do not fit into one vector).
6526 The generated vector results would normally be expected to be
6527 generated in the same order as in the original scalar computation,
6528 i.e. if 8 results are generated in each vector iteration, they are
6529 to be organized as follows:
6530 vect1: [res1,res2,res3,res4],
6531 vect2: [res5,res6,res7,res8].
6533 However, in the special case that the result of the widening
6534 operation is used in a reduction computation only, the order doesn't
6535 matter (because when vectorizing a reduction we change the order of
6536 the computation). Some targets can take advantage of this and
6537 generate more efficient code. For example, targets like Altivec,
6538 that support widen_mult using a sequence of {mult_even,mult_odd}
6539 generate the following vectors:
6540 vect1: [res1,res3,res5,res7],
6541 vect2: [res2,res4,res6,res8].
6543 When vectorizing outer-loops, we execute the inner-loop sequentially
6544 (each vectorized inner-loop iteration contributes to VF outer-loop
6545 iterations in parallel). We therefore don't allow to change the
6546 order of the computation in the inner-loop during outer-loop
6548 /* TODO: Another case in which order doesn't *really* matter is when we
6549 widen and then contract again, e.g. (short)((int)x * y >> 8).
6550 Normally, pack_trunc performs an even/odd permute, whereas the
6551 repack from an even/odd expansion would be an interleave, which
6552 would be significantly simpler for e.g. AVX2. */
6553 /* In any case, in order to avoid duplicating the code below, recurse
6554 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6555 are properly set up for the caller. If we fail, we'll continue with
6556 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6558 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6559 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6560 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6561 stmt
, vectype_out
, vectype_in
,
6562 code1
, code2
, multi_step_cvt
,
6565 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6566 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6569 case VEC_WIDEN_MULT_EVEN_EXPR
:
6570 /* Support the recursion induced just above. */
6571 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6572 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6575 case WIDEN_LSHIFT_EXPR
:
6576 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6577 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6581 c1
= VEC_UNPACK_LO_EXPR
;
6582 c2
= VEC_UNPACK_HI_EXPR
;
6586 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6587 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6590 case FIX_TRUNC_EXPR
:
6591 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6592 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6593 computing the operation. */
6600 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6602 enum tree_code ctmp
= c1
;
6607 if (code
== FIX_TRUNC_EXPR
)
6609 /* The signedness is determined from output operand. */
6610 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6611 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6615 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6616 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6619 if (!optab1
|| !optab2
)
6622 vec_mode
= TYPE_MODE (vectype
);
6623 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6624 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6630 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6631 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6634 /* Check if it's a multi-step conversion that can be done using intermediate
6637 prev_type
= vectype
;
6638 prev_mode
= vec_mode
;
6640 if (!CONVERT_EXPR_CODE_P (code
))
6643 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6644 intermediate steps in promotion sequence. We try
6645 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6647 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6648 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6650 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6652 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6653 TYPE_UNSIGNED (prev_type
));
6654 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6655 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6657 if (!optab3
|| !optab4
6658 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6659 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6660 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6661 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6662 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6663 == CODE_FOR_nothing
)
6664 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6665 == CODE_FOR_nothing
))
6668 interm_types
->quick_push (intermediate_type
);
6669 (*multi_step_cvt
)++;
6671 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6672 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6675 prev_type
= intermediate_type
;
6676 prev_mode
= intermediate_mode
;
6679 interm_types
->release ();
6684 /* Function supportable_narrowing_operation
6686 Check whether an operation represented by the code CODE is a
6687 narrowing operation that is supported by the target platform in
6688 vector form (i.e., when operating on arguments of type VECTYPE_IN
6689 and producing a result of type VECTYPE_OUT).
6691 Narrowing operations we currently support are NOP (CONVERT) and
6692 FIX_TRUNC. This function checks if these operations are supported by
6693 the target platform directly via vector tree-codes.
6696 - CODE1 is the code of a vector operation to be used when
6697 vectorizing the operation, if available.
6698 - MULTI_STEP_CVT determines the number of required intermediate steps in
6699 case of multi-step conversion (like int->short->char - in that case
6700 MULTI_STEP_CVT will be 1).
6701 - INTERM_TYPES contains the intermediate type required to perform the
6702 narrowing operation (short in the above example). */
6705 supportable_narrowing_operation (enum tree_code code
,
6706 tree vectype_out
, tree vectype_in
,
6707 enum tree_code
*code1
, int *multi_step_cvt
,
6708 vec
<tree
> *interm_types
)
6710 enum machine_mode vec_mode
;
6711 enum insn_code icode1
;
6712 optab optab1
, interm_optab
;
6713 tree vectype
= vectype_in
;
6714 tree narrow_vectype
= vectype_out
;
6716 tree intermediate_type
;
6717 enum machine_mode intermediate_mode
, prev_mode
;
6721 *multi_step_cvt
= 0;
6725 c1
= VEC_PACK_TRUNC_EXPR
;
6728 case FIX_TRUNC_EXPR
:
6729 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6733 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6734 tree code and optabs used for computing the operation. */
6741 if (code
== FIX_TRUNC_EXPR
)
6742 /* The signedness is determined from output operand. */
6743 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6745 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6750 vec_mode
= TYPE_MODE (vectype
);
6751 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6756 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6759 /* Check if it's a multi-step conversion that can be done using intermediate
6761 prev_mode
= vec_mode
;
6762 if (code
== FIX_TRUNC_EXPR
)
6763 uns
= TYPE_UNSIGNED (vectype_out
);
6765 uns
= TYPE_UNSIGNED (vectype
);
6767 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6768 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6769 costly than signed. */
6770 if (code
== FIX_TRUNC_EXPR
&& uns
)
6772 enum insn_code icode2
;
6775 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6777 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6778 if (interm_optab
!= unknown_optab
6779 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6780 && insn_data
[icode1
].operand
[0].mode
6781 == insn_data
[icode2
].operand
[0].mode
)
6784 optab1
= interm_optab
;
6789 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6790 intermediate steps in promotion sequence. We try
6791 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6792 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6793 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6795 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6797 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6799 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6802 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6803 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6804 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6805 == CODE_FOR_nothing
))
6808 interm_types
->quick_push (intermediate_type
);
6809 (*multi_step_cvt
)++;
6811 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6814 prev_mode
= intermediate_mode
;
6815 optab1
= interm_optab
;
6818 interm_types
->release ();