1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
35 #include "recog.h" /* FIXME: for insn_data */
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
47 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
49 return STMT_VINFO_VECTYPE (stmt_info
);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
55 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
57 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
58 basic_block bb
= gimple_bb (stmt
);
59 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
65 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
67 return (bb
->loop_father
== loop
->inner
);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
75 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
76 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
77 int misalign
, enum vect_cost_model_location where
)
81 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
82 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
83 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
86 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
92 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
93 void *target_cost_data
;
96 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
98 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
100 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
184 enum vect_relevant relevant
, bool live_p
,
185 bool used_in_pattern
)
187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
188 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
189 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
192 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE
, vect_location
,
194 "mark relevant %d, live %d.\n", relevant
, live_p
);
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
203 if (!used_in_pattern
)
205 imm_use_iterator imm_iter
;
209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
210 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
212 if (is_gimple_assign (stmt
))
213 lhs
= gimple_assign_lhs (stmt
);
215 lhs
= gimple_call_lhs (stmt
);
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
220 if (TREE_CODE (lhs
) == SSA_NAME
)
221 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
223 if (is_gimple_debug (USE_STMT (use_p
)))
225 use_stmt
= USE_STMT (use_p
);
227 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
230 if (vinfo_for_stmt (use_stmt
)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
246 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
248 if (dump_enabled_p ())
249 dump_printf_loc (MSG_NOTE
, vect_location
,
250 "last stmt in pattern. don't mark"
251 " relevant/live.\n");
252 stmt_info
= vinfo_for_stmt (pattern_stmt
);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
254 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
255 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
260 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
261 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
262 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
264 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_NOTE
, vect_location
,
269 "already marked relevant/live.\n");
273 worklist
->safe_push (stmt
);
277 /* Function vect_stmt_relevant_p.
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
287 CHECKME: what other side effects would the vectorizer allow? */
290 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
291 enum vect_relevant
*relevant
, bool *live_p
)
293 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
295 imm_use_iterator imm_iter
;
299 *relevant
= vect_unused_in_scope
;
302 /* cond stmt other than loop exit cond. */
303 if (is_ctrl_stmt (stmt
)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
305 != loop_exit_ctrl_vec_info_type
)
306 *relevant
= vect_used_in_scope
;
308 /* changing memory. */
309 if (gimple_code (stmt
) != GIMPLE_PHI
)
310 if (gimple_vdef (stmt
))
312 if (dump_enabled_p ())
313 dump_printf_loc (MSG_NOTE
, vect_location
,
314 "vec_stmt_relevant_p: stmt has vdefs.\n");
315 *relevant
= vect_used_in_scope
;
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
321 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
323 basic_block bb
= gimple_bb (USE_STMT (use_p
));
324 if (!flow_bb_inside_loop_p (loop
, bb
))
326 if (dump_enabled_p ())
327 dump_printf_loc (MSG_NOTE
, vect_location
,
328 "vec_stmt_relevant_p: used out of loop.\n");
330 if (is_gimple_debug (USE_STMT (use_p
)))
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
336 gcc_assert (bb
== single_exit (loop
)->dest
);
343 return (*live_p
|| *relevant
);
347 /* Function exist_non_indexing_operands_for_use_p
349 USE is one of the uses attached to STMT. Check if USE is
350 used in STMT for anything other than indexing an array. */
353 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
356 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
358 /* USE corresponds to some operand in STMT. If there is no data
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info
))
364 /* STMT has a data_ref. FORNOW this means that its of one of
368 (This should have been verified in analyze_data_refs).
370 'var' in the second case corresponds to a def, not a use,
371 so USE cannot correspond to any operands that are not used
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
377 if (!gimple_assign_copy_p (stmt
))
379 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
381 operand
= gimple_assign_rhs1 (stmt
);
382 if (TREE_CODE (operand
) != SSA_NAME
)
393 Function process_use.
396 - a USE in STMT in a loop represented by LOOP_VINFO
397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
398 that defined USE. This is done by calling mark_relevant and passing it
399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
409 - case 1: If USE is used only for address computations (e.g. array indexing),
410 which does not need to be directly vectorized, then the liveness/relevance
411 of the respective DEF_STMT is left unchanged.
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
417 Return true if everything is as expected. Return false otherwise. */
420 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
421 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
424 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
425 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
426 stmt_vec_info dstmt_vinfo
;
427 basic_block bb
, def_bb
;
430 enum vect_def_type dt
;
432 /* case 1: we are only interested in uses that need to be vectorized. Uses
433 that are used for address computation are not considered relevant. */
434 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
437 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
439 if (dump_enabled_p ())
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
441 "not vectorized: unsupported use in stmt.\n");
445 if (!def_stmt
|| gimple_nop_p (def_stmt
))
448 def_bb
= gimple_bb (def_stmt
);
449 if (!flow_bb_inside_loop_p (loop
, def_bb
))
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
462 bb
= gimple_bb (stmt
);
463 if (gimple_code (stmt
) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
465 && gimple_code (def_stmt
) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
467 && bb
->loop_father
== def_bb
->loop_father
)
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_NOTE
, vect_location
,
471 "reduc-stmt defining reduc-phi in the same nest.\n");
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
473 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
476 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
487 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "outer-loop def-stmt defining inner-loop stmt.\n");
495 case vect_unused_in_scope
:
496 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
497 vect_used_in_scope
: vect_unused_in_scope
;
500 case vect_used_in_outer_by_reduction
:
501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
502 relevant
= vect_used_by_reduction
;
505 case vect_used_in_outer
:
506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
507 relevant
= vect_used_in_scope
;
510 case vect_used_in_scope
:
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
525 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
,
529 "inner-loop def-stmt defining outer-loop stmt.\n");
533 case vect_unused_in_scope
:
534 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
535 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
536 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
539 case vect_used_by_reduction
:
540 relevant
= vect_used_in_outer_by_reduction
;
543 case vect_used_in_scope
:
544 relevant
= vect_used_in_outer
;
552 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
553 is_pattern_stmt_p (stmt_vinfo
));
558 /* Function vect_mark_stmts_to_be_vectorized.
560 Not all stmts in the loop need to be vectorized. For example:
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
572 This pass detects such stmts. */
575 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
577 vec
<gimple
> worklist
;
578 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
579 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
580 unsigned int nbbs
= loop
->num_nodes
;
581 gimple_stmt_iterator si
;
584 stmt_vec_info stmt_vinfo
;
588 enum vect_relevant relevant
, tmp_relevant
;
589 enum vect_def_type def_type
;
591 if (dump_enabled_p ())
592 dump_printf_loc (MSG_NOTE
, vect_location
,
593 "=== vect_mark_stmts_to_be_vectorized ===\n");
595 worklist
.create (64);
597 /* 1. Init worklist. */
598 for (i
= 0; i
< nbbs
; i
++)
601 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
608 dump_printf (MSG_NOTE
, "\n");
611 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
612 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
614 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
616 stmt
= gsi_stmt (si
);
617 if (dump_enabled_p ())
619 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
620 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
621 dump_printf (MSG_NOTE
, "\n");
624 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
625 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
629 /* 2. Process_worklist */
630 while (worklist
.length () > 0)
635 stmt
= worklist
.pop ();
636 if (dump_enabled_p ())
638 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
639 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
640 dump_printf (MSG_NOTE
, "\n");
643 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
644 (DEF_STMT) as relevant/irrelevant and live/dead according to the
645 liveness and relevance properties of STMT. */
646 stmt_vinfo
= vinfo_for_stmt (stmt
);
647 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
648 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
650 /* Generally, the liveness and relevance properties of STMT are
651 propagated as is to the DEF_STMTs of its USEs:
652 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
653 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
655 One exception is when STMT has been identified as defining a reduction
656 variable; in this case we set the liveness/relevance as follows:
658 relevant = vect_used_by_reduction
659 This is because we distinguish between two kinds of relevant stmts -
660 those that are used by a reduction computation, and those that are
661 (also) used by a regular computation. This allows us later on to
662 identify stmts that are used solely by a reduction, and therefore the
663 order of the results that they produce does not have to be kept. */
665 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
666 tmp_relevant
= relevant
;
669 case vect_reduction_def
:
670 switch (tmp_relevant
)
672 case vect_unused_in_scope
:
673 relevant
= vect_used_by_reduction
;
676 case vect_used_by_reduction
:
677 if (gimple_code (stmt
) == GIMPLE_PHI
)
682 if (dump_enabled_p ())
683 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
684 "unsupported use of reduction.\n");
692 case vect_nested_cycle
:
693 if (tmp_relevant
!= vect_unused_in_scope
694 && tmp_relevant
!= vect_used_in_outer_by_reduction
695 && tmp_relevant
!= vect_used_in_outer
)
697 if (dump_enabled_p ())
698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
699 "unsupported use of nested cycle.\n");
708 case vect_double_reduction_def
:
709 if (tmp_relevant
!= vect_unused_in_scope
710 && tmp_relevant
!= vect_used_by_reduction
)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
714 "unsupported use of double reduction.\n");
727 if (is_pattern_stmt_p (stmt_vinfo
))
729 /* Pattern statements are not inserted into the code, so
730 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
731 have to scan the RHS or function arguments instead. */
732 if (is_gimple_assign (stmt
))
734 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
735 tree op
= gimple_assign_rhs1 (stmt
);
738 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
740 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
741 live_p
, relevant
, &worklist
, false)
742 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
743 live_p
, relevant
, &worklist
, false))
750 for (; i
< gimple_num_ops (stmt
); i
++)
752 op
= gimple_op (stmt
, i
);
753 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
761 else if (is_gimple_call (stmt
))
763 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
765 tree arg
= gimple_call_arg (stmt
, i
);
766 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
776 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
778 tree op
= USE_FROM_PTR (use_p
);
779 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
787 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
790 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
792 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
799 } /* while worklist */
806 /* Function vect_model_simple_cost.
808 Models cost for simple operations, i.e. those that only emit ncopies of a
809 single op. Right now, this does not account for multiple insns that could
810 be generated for the single vector op. We will handle that shortly. */
813 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
814 enum vect_def_type
*dt
,
815 stmt_vector_for_cost
*prologue_cost_vec
,
816 stmt_vector_for_cost
*body_cost_vec
)
819 int inside_cost
= 0, prologue_cost
= 0;
821 /* The SLP costs were already calculated during SLP tree build. */
822 if (PURE_SLP_STMT (stmt_info
))
825 /* FORNOW: Assuming maximum 2 args per stmts. */
826 for (i
= 0; i
< 2; i
++)
827 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
828 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
829 stmt_info
, 0, vect_prologue
);
831 /* Pass the inside-of-loop statements to the target-specific cost model. */
832 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
833 stmt_info
, 0, vect_body
);
835 if (dump_enabled_p ())
836 dump_printf_loc (MSG_NOTE
, vect_location
,
837 "vect_model_simple_cost: inside_cost = %d, "
838 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
842 /* Model cost for type demotion and promotion operations. PWR is normally
843 zero for single-step promotions and demotions. It will be one if
844 two-step promotion/demotion is required, and so on. Each additional
845 step doubles the number of instructions required. */
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
849 enum vect_def_type
*dt
, int pwr
)
852 int inside_cost
= 0, prologue_cost
= 0;
853 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
854 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
855 void *target_cost_data
;
857 /* The SLP costs were already calculated during SLP tree build. */
858 if (PURE_SLP_STMT (stmt_info
))
862 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
864 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
866 for (i
= 0; i
< pwr
+ 1; i
++)
868 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
870 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
871 vec_promote_demote
, stmt_info
, 0,
875 /* FORNOW: Assuming maximum 2 args per stmts. */
876 for (i
= 0; i
< 2; i
++)
877 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
878 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
879 stmt_info
, 0, vect_prologue
);
881 if (dump_enabled_p ())
882 dump_printf_loc (MSG_NOTE
, vect_location
,
883 "vect_model_promotion_demotion_cost: inside_cost = %d, "
884 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
887 /* Function vect_cost_group_size
889 For grouped load or store, return the group_size only if it is the first
890 load or store of a group, else return 1. This ensures that group size is
891 only returned once per group. */
894 vect_cost_group_size (stmt_vec_info stmt_info
)
896 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
898 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
899 return GROUP_SIZE (stmt_info
);
905 /* Function vect_model_store_cost
907 Models cost for stores. In the case of grouped accesses, one access
908 has the overhead of the grouped access attributed to it. */
911 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
912 bool store_lanes_p
, enum vect_def_type dt
,
914 stmt_vector_for_cost
*prologue_cost_vec
,
915 stmt_vector_for_cost
*body_cost_vec
)
918 unsigned int inside_cost
= 0, prologue_cost
= 0;
919 struct data_reference
*first_dr
;
922 /* The SLP costs were already calculated during SLP tree build. */
923 if (PURE_SLP_STMT (stmt_info
))
926 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
927 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
928 stmt_info
, 0, vect_prologue
);
930 /* Grouped access? */
931 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
935 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
940 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
941 group_size
= vect_cost_group_size (stmt_info
);
944 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
946 /* Not a grouped access. */
950 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
953 /* We assume that the cost of a single store-lanes instruction is
954 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
955 access is instead being provided by a permute-and-store operation,
956 include the cost of the permutes. */
957 if (!store_lanes_p
&& group_size
> 1)
959 /* Uses a high and low interleave operation for each needed permute. */
961 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
962 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
963 stmt_info
, 0, vect_body
);
965 if (dump_enabled_p ())
966 dump_printf_loc (MSG_NOTE
, vect_location
,
967 "vect_model_store_cost: strided group_size = %d .\n",
971 /* Costs of the stores. */
972 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
974 if (dump_enabled_p ())
975 dump_printf_loc (MSG_NOTE
, vect_location
,
976 "vect_model_store_cost: inside_cost = %d, "
977 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
981 /* Calculate cost of DR's memory access. */
983 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
984 unsigned int *inside_cost
,
985 stmt_vector_for_cost
*body_cost_vec
)
987 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
988 gimple stmt
= DR_STMT (dr
);
989 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
991 switch (alignment_support_scheme
)
995 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
996 vector_store
, stmt_info
, 0,
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE
, vect_location
,
1001 "vect_model_store_cost: aligned.\n");
1005 case dr_unaligned_supported
:
1007 /* Here, we assign an additional cost for the unaligned store. */
1008 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1009 unaligned_store
, stmt_info
,
1010 DR_MISALIGNMENT (dr
), vect_body
);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE
, vect_location
,
1013 "vect_model_store_cost: unaligned supported by "
1018 case dr_unaligned_unsupported
:
1020 *inside_cost
= VECT_MAX_COST
;
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1024 "vect_model_store_cost: unsupported access.\n");
1034 /* Function vect_model_load_cost
1036 Models cost for loads. In the case of grouped accesses, the last access
1037 has the overhead of the grouped access attributed to it. Since unaligned
1038 accesses are supported for loads, we also account for the costs of the
1039 access scheme chosen. */
1042 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1043 bool load_lanes_p
, slp_tree slp_node
,
1044 stmt_vector_for_cost
*prologue_cost_vec
,
1045 stmt_vector_for_cost
*body_cost_vec
)
1049 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1050 unsigned int inside_cost
= 0, prologue_cost
= 0;
1052 /* The SLP costs were already calculated during SLP tree build. */
1053 if (PURE_SLP_STMT (stmt_info
))
1056 /* Grouped accesses? */
1057 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1058 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1060 group_size
= vect_cost_group_size (stmt_info
);
1061 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1063 /* Not a grouped access. */
1070 /* We assume that the cost of a single load-lanes instruction is
1071 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1072 access is instead being provided by a load-and-permute operation,
1073 include the cost of the permutes. */
1074 if (!load_lanes_p
&& group_size
> 1)
1076 /* Uses an even and odd extract operations for each needed permute. */
1077 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1078 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1079 stmt_info
, 0, vect_body
);
1081 if (dump_enabled_p ())
1082 dump_printf_loc (MSG_NOTE
, vect_location
,
1083 "vect_model_load_cost: strided group_size = %d .\n",
1087 /* The loads themselves. */
1088 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1092 inside_cost
+= record_stmt_cost (body_cost_vec
,
1093 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1094 scalar_load
, stmt_info
, 0, vect_body
);
1095 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1096 stmt_info
, 0, vect_body
);
1099 vect_get_load_cost (first_dr
, ncopies
,
1100 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1101 || group_size
> 1 || slp_node
),
1102 &inside_cost
, &prologue_cost
,
1103 prologue_cost_vec
, body_cost_vec
, true);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE
, vect_location
,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1112 /* Calculate cost of DR's memory access. */
1114 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1115 bool add_realign_cost
, unsigned int *inside_cost
,
1116 unsigned int *prologue_cost
,
1117 stmt_vector_for_cost
*prologue_cost_vec
,
1118 stmt_vector_for_cost
*body_cost_vec
,
1119 bool record_prologue_costs
)
1121 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1122 gimple stmt
= DR_STMT (dr
);
1123 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1125 switch (alignment_support_scheme
)
1129 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1130 stmt_info
, 0, vect_body
);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE
, vect_location
,
1134 "vect_model_load_cost: aligned.\n");
1138 case dr_unaligned_supported
:
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1142 unaligned_load
, stmt_info
,
1143 DR_MISALIGNMENT (dr
), vect_body
);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: unaligned supported by "
1152 case dr_explicit_realign
:
1154 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1155 vector_load
, stmt_info
, 0, vect_body
);
1156 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1157 vec_perm
, stmt_info
, 0, vect_body
);
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1162 if (targetm
.vectorize
.builtin_mask_for_load
)
1163 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1164 stmt_info
, 0, vect_body
);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE
, vect_location
,
1168 "vect_model_load_cost: explicit realign\n");
1172 case dr_explicit_realign_optimized
:
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE
, vect_location
,
1176 "vect_model_load_cost: unaligned software "
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1186 if (add_realign_cost
&& record_prologue_costs
)
1188 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1189 vector_stmt
, stmt_info
,
1191 if (targetm
.vectorize
.builtin_mask_for_load
)
1192 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1193 vector_stmt
, stmt_info
,
1197 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1198 stmt_info
, 0, vect_body
);
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1200 stmt_info
, 0, vect_body
);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE
, vect_location
,
1204 "vect_model_load_cost: explicit realign optimized"
1210 case dr_unaligned_unsupported
:
1212 *inside_cost
= VECT_MAX_COST
;
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1216 "vect_model_load_cost: unsupported access.\n");
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1229 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1232 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1235 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1236 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1240 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1244 if (nested_in_vect_loop_p (loop
, stmt
))
1247 pe
= loop_preheader_edge (loop
);
1248 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1249 gcc_assert (!new_bb
);
1253 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1255 gimple_stmt_iterator gsi_bb_start
;
1257 gcc_assert (bb_vinfo
);
1258 bb
= BB_VINFO_BB (bb_vinfo
);
1259 gsi_bb_start
= gsi_after_labels (bb
);
1260 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1264 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE
, vect_location
,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1269 dump_printf (MSG_NOTE
, "\n");
1273 /* Function vect_init_vector.
1275 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1276 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1277 vector type a vector with all elements equal to VAL is created first.
1278 Place the initialization at BSI if it is not NULL. Otherwise, place the
1279 initialization at the loop preheader.
1280 Return the DEF of INIT_STMT.
1281 It will be used in the vectorization of STMT. */
1284 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1291 if (TREE_CODE (type
) == VECTOR_TYPE
1292 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1294 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1296 if (CONSTANT_CLASS_P (val
))
1297 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1300 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1301 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1304 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1308 val
= build_vector_from_val (type
, val
);
1311 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1312 init_stmt
= gimple_build_assign (new_var
, val
);
1313 new_temp
= make_ssa_name (new_var
, init_stmt
);
1314 gimple_assign_set_lhs (init_stmt
, new_temp
);
1315 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1316 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1321 /* Function vect_get_vec_def_for_operand.
1323 OP is an operand in STMT. This function returns a (vector) def that will be
1324 used in the vectorized stmt for STMT.
1326 In the case that OP is an SSA_NAME which is defined in the loop, then
1327 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1329 In case OP is an invariant or constant, a new stmt that creates a vector def
1330 needs to be introduced. */
1333 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1338 stmt_vec_info def_stmt_info
= NULL
;
1339 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1340 unsigned int nunits
;
1341 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1343 enum vect_def_type dt
;
1347 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE
, vect_location
,
1350 "vect_get_vec_def_for_operand: ");
1351 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1352 dump_printf (MSG_NOTE
, "\n");
1355 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1356 &def_stmt
, &def
, &dt
);
1357 gcc_assert (is_simple_use
);
1358 if (dump_enabled_p ())
1360 int loc_printed
= 0;
1363 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1365 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1366 dump_printf (MSG_NOTE
, "\n");
1371 dump_printf (MSG_NOTE
, " def_stmt = ");
1373 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1374 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1375 dump_printf (MSG_NOTE
, "\n");
1381 /* Case 1: operand is a constant. */
1382 case vect_constant_def
:
1384 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1385 gcc_assert (vector_type
);
1386 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1391 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1392 if (dump_enabled_p ())
1393 dump_printf_loc (MSG_NOTE
, vect_location
,
1394 "Create vector_cst. nunits = %d\n", nunits
);
1396 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1399 /* Case 2: operand is defined outside the loop - loop invariant. */
1400 case vect_external_def
:
1402 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1403 gcc_assert (vector_type
);
1408 /* Create 'vec_inv = {inv,inv,..,inv}' */
1409 if (dump_enabled_p ())
1410 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1412 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1415 /* Case 3: operand is defined inside the loop. */
1416 case vect_internal_def
:
1419 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1421 /* Get the def from the vectorized stmt. */
1422 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1424 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1425 /* Get vectorized pattern statement. */
1427 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1428 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1429 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1430 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1431 gcc_assert (vec_stmt
);
1432 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1433 vec_oprnd
= PHI_RESULT (vec_stmt
);
1434 else if (is_gimple_call (vec_stmt
))
1435 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1437 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1441 /* Case 4: operand is defined by a loop header phi - reduction */
1442 case vect_reduction_def
:
1443 case vect_double_reduction_def
:
1444 case vect_nested_cycle
:
1448 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1449 loop
= (gimple_bb (def_stmt
))->loop_father
;
1451 /* Get the def before the loop */
1452 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1453 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1456 /* Case 5: operand is defined by loop-header phi - induction. */
1457 case vect_induction_def
:
1459 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1461 /* Get the def from the vectorized stmt. */
1462 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1463 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1464 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1465 vec_oprnd
= PHI_RESULT (vec_stmt
);
1467 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1477 /* Function vect_get_vec_def_for_stmt_copy
1479 Return a vector-def for an operand. This function is used when the
1480 vectorized stmt to be created (by the caller to this function) is a "copy"
1481 created in case the vectorized result cannot fit in one vector, and several
1482 copies of the vector-stmt are required. In this case the vector-def is
1483 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1484 of the stmt that defines VEC_OPRND.
1485 DT is the type of the vector def VEC_OPRND.
1488 In case the vectorization factor (VF) is bigger than the number
1489 of elements that can fit in a vectype (nunits), we have to generate
1490 more than one vector stmt to vectorize the scalar stmt. This situation
1491 arises when there are multiple data-types operated upon in the loop; the
1492 smallest data-type determines the VF, and as a result, when vectorizing
1493 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1494 vector stmt (each computing a vector of 'nunits' results, and together
1495 computing 'VF' results in each iteration). This function is called when
1496 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1497 which VF=16 and nunits=4, so the number of copies required is 4):
1499 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1501 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1502 VS1.1: vx.1 = memref1 VS1.2
1503 VS1.2: vx.2 = memref2 VS1.3
1504 VS1.3: vx.3 = memref3
1506 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1507 VSnew.1: vz1 = vx.1 + ... VSnew.2
1508 VSnew.2: vz2 = vx.2 + ... VSnew.3
1509 VSnew.3: vz3 = vx.3 + ...
1511 The vectorization of S1 is explained in vectorizable_load.
1512 The vectorization of S2:
1513 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1514 the function 'vect_get_vec_def_for_operand' is called to
1515 get the relevant vector-def for each operand of S2. For operand x it
1516 returns the vector-def 'vx.0'.
1518 To create the remaining copies of the vector-stmt (VSnew.j), this
1519 function is called to get the relevant vector-def for each operand. It is
1520 obtained from the respective VS1.j stmt, which is recorded in the
1521 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1523 For example, to obtain the vector-def 'vx.1' in order to create the
1524 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1525 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1526 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1527 and return its def ('vx.1').
1528 Overall, to create the above sequence this function will be called 3 times:
1529 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1530 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1531 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1534 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1536 gimple vec_stmt_for_operand
;
1537 stmt_vec_info def_stmt_info
;
1539 /* Do nothing; can reuse same def. */
1540 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1543 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1544 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1545 gcc_assert (def_stmt_info
);
1546 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1547 gcc_assert (vec_stmt_for_operand
);
1548 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1549 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1550 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1552 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1557 /* Get vectorized definitions for the operands to create a copy of an original
1558 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1561 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1562 vec
<tree
> *vec_oprnds0
,
1563 vec
<tree
> *vec_oprnds1
)
1565 tree vec_oprnd
= vec_oprnds0
->pop ();
1567 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1568 vec_oprnds0
->quick_push (vec_oprnd
);
1570 if (vec_oprnds1
&& vec_oprnds1
->length ())
1572 vec_oprnd
= vec_oprnds1
->pop ();
1573 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1574 vec_oprnds1
->quick_push (vec_oprnd
);
1579 /* Get vectorized definitions for OP0 and OP1.
1580 REDUC_INDEX is the index of reduction operand in case of reduction,
1581 and -1 otherwise. */
1584 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1585 vec
<tree
> *vec_oprnds0
,
1586 vec
<tree
> *vec_oprnds1
,
1587 slp_tree slp_node
, int reduc_index
)
1591 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1594 vec
<vec
<tree
> > vec_defs
;
1595 vec_defs
.create (nops
);
1597 ops
.quick_push (op0
);
1599 ops
.quick_push (op1
);
1601 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1603 *vec_oprnds0
= vec_defs
[0];
1605 *vec_oprnds1
= vec_defs
[1];
1608 vec_defs
.release ();
1614 vec_oprnds0
->create (1);
1615 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1616 vec_oprnds0
->quick_push (vec_oprnd
);
1620 vec_oprnds1
->create (1);
1621 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1622 vec_oprnds1
->quick_push (vec_oprnd
);
1628 /* Function vect_finish_stmt_generation.
1630 Insert a new stmt. */
1633 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1634 gimple_stmt_iterator
*gsi
)
1636 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1637 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1638 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1640 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1642 if (!gsi_end_p (*gsi
)
1643 && gimple_has_mem_ops (vec_stmt
))
1645 gimple at_stmt
= gsi_stmt (*gsi
);
1646 tree vuse
= gimple_vuse (at_stmt
);
1647 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1649 tree vdef
= gimple_vdef (at_stmt
);
1650 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1651 /* If we have an SSA vuse and insert a store, update virtual
1652 SSA form to avoid triggering the renamer. Do so only
1653 if we can easily see all uses - which is what almost always
1654 happens with the way vectorized stmts are inserted. */
1655 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1656 && ((is_gimple_assign (vec_stmt
)
1657 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1658 || (is_gimple_call (vec_stmt
)
1659 && !(gimple_call_flags (vec_stmt
)
1660 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1662 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1663 gimple_set_vdef (vec_stmt
, new_vdef
);
1664 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1668 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1670 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1673 if (dump_enabled_p ())
1675 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1676 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1677 dump_printf (MSG_NOTE
, "\n");
1680 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1683 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1684 a function declaration if the target has a vectorized version
1685 of the function, or NULL_TREE if the function cannot be vectorized. */
1688 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1690 tree fndecl
= gimple_call_fndecl (call
);
1692 /* We only handle functions that do not read or clobber memory -- i.e.
1693 const or novops ones. */
1694 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1698 || TREE_CODE (fndecl
) != FUNCTION_DECL
1699 || !DECL_BUILT_IN (fndecl
))
1702 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1706 /* Function vectorizable_call.
1708 Check if STMT performs a function call that can be vectorized.
1709 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1710 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1714 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1720 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1721 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1722 tree vectype_out
, vectype_in
;
1725 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1726 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1727 tree fndecl
, new_temp
, def
, rhs_type
;
1729 enum vect_def_type dt
[3]
1730 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1731 gimple new_stmt
= NULL
;
1733 vec
<tree
> vargs
= vNULL
;
1734 enum { NARROW
, NONE
, WIDEN
} modifier
;
1738 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1741 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1744 /* Is STMT a vectorizable call? */
1745 if (!is_gimple_call (stmt
))
1748 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1751 if (stmt_can_throw_internal (stmt
))
1754 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1756 /* Process function arguments. */
1757 rhs_type
= NULL_TREE
;
1758 vectype_in
= NULL_TREE
;
1759 nargs
= gimple_call_num_args (stmt
);
1761 /* Bail out if the function has more than three arguments, we do not have
1762 interesting builtin functions to vectorize with more than two arguments
1763 except for fma. No arguments is also not good. */
1764 if (nargs
== 0 || nargs
> 3)
1767 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1768 if (gimple_call_internal_p (stmt
)
1769 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1772 rhs_type
= unsigned_type_node
;
1775 for (i
= 0; i
< nargs
; i
++)
1779 op
= gimple_call_arg (stmt
, i
);
1781 /* We can only handle calls with arguments of the same type. */
1783 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1787 "argument types differ.\n");
1791 rhs_type
= TREE_TYPE (op
);
1793 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1794 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1796 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1798 "use not simple.\n");
1803 vectype_in
= opvectype
;
1805 && opvectype
!= vectype_in
)
1807 if (dump_enabled_p ())
1808 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1809 "argument vector types differ.\n");
1813 /* If all arguments are external or constant defs use a vector type with
1814 the same size as the output vector type. */
1816 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1818 gcc_assert (vectype_in
);
1821 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1824 "no vectype for scalar type ");
1825 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
1826 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1833 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1834 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1835 if (nunits_in
== nunits_out
/ 2)
1837 else if (nunits_out
== nunits_in
)
1839 else if (nunits_out
== nunits_in
/ 2)
1844 /* For now, we only vectorize functions if a target specific builtin
1845 is available. TODO -- in some cases, it might be profitable to
1846 insert the calls for pieces of the vector, in order to be able
1847 to vectorize other operations in the loop. */
1848 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1849 if (fndecl
== NULL_TREE
)
1851 if (gimple_call_internal_p (stmt
)
1852 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
1855 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1856 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
1857 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1858 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
1860 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1861 { 0, 1, 2, ... vf - 1 } vector. */
1862 gcc_assert (nargs
== 0);
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1868 "function is not vectorizable.\n");
1873 gcc_assert (!gimple_vuse (stmt
));
1875 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1877 else if (modifier
== NARROW
)
1878 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1880 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1882 /* Sanity check: make sure that at least one copy of the vectorized stmt
1883 needs to be generated. */
1884 gcc_assert (ncopies
>= 1);
1886 if (!vec_stmt
) /* transformation not required. */
1888 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1889 if (dump_enabled_p ())
1890 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
1892 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
1902 scalar_dest
= gimple_call_lhs (stmt
);
1903 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1905 prev_stmt_info
= NULL
;
1909 for (j
= 0; j
< ncopies
; ++j
)
1911 /* Build argument list for the vectorized call. */
1913 vargs
.create (nargs
);
1919 vec
<vec
<tree
> > vec_defs
;
1920 vec_defs
.create (nargs
);
1921 vec
<tree
> vec_oprnds0
;
1923 for (i
= 0; i
< nargs
; i
++)
1924 vargs
.quick_push (gimple_call_arg (stmt
, i
));
1925 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1926 vec_oprnds0
= vec_defs
[0];
1928 /* Arguments are ready. Create the new vector stmt. */
1929 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
1932 for (k
= 0; k
< nargs
; k
++)
1934 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
1935 vargs
[k
] = vec_oprndsk
[i
];
1937 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1938 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1939 gimple_call_set_lhs (new_stmt
, new_temp
);
1940 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1941 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
1944 for (i
= 0; i
< nargs
; i
++)
1946 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
1947 vec_oprndsi
.release ();
1949 vec_defs
.release ();
1953 for (i
= 0; i
< nargs
; i
++)
1955 op
= gimple_call_arg (stmt
, i
);
1958 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1961 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1963 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1966 vargs
.quick_push (vec_oprnd0
);
1969 if (gimple_call_internal_p (stmt
)
1970 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1972 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
1974 for (k
= 0; k
< nunits_out
; ++k
)
1975 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
1976 tree cst
= build_vector (vectype_out
, v
);
1978 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
1979 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
1980 new_temp
= make_ssa_name (new_var
, init_stmt
);
1981 gimple_assign_set_lhs (init_stmt
, new_temp
);
1982 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
1983 new_temp
= make_ssa_name (vec_dest
, NULL
);
1984 new_stmt
= gimple_build_assign (new_temp
,
1985 gimple_assign_lhs (init_stmt
));
1989 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1990 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1991 gimple_call_set_lhs (new_stmt
, new_temp
);
1993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1996 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1998 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2000 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2006 for (j
= 0; j
< ncopies
; ++j
)
2008 /* Build argument list for the vectorized call. */
2010 vargs
.create (nargs
* 2);
2016 vec
<vec
<tree
> > vec_defs
;
2017 vec_defs
.create (nargs
);
2018 vec
<tree
> vec_oprnds0
;
2020 for (i
= 0; i
< nargs
; i
++)
2021 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2022 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2023 vec_oprnds0
= vec_defs
[0];
2025 /* Arguments are ready. Create the new vector stmt. */
2026 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2030 for (k
= 0; k
< nargs
; k
++)
2032 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2033 vargs
.quick_push (vec_oprndsk
[i
]);
2034 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2036 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2037 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2038 gimple_call_set_lhs (new_stmt
, new_temp
);
2039 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2040 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2043 for (i
= 0; i
< nargs
; i
++)
2045 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2046 vec_oprndsi
.release ();
2048 vec_defs
.release ();
2052 for (i
= 0; i
< nargs
; i
++)
2054 op
= gimple_call_arg (stmt
, i
);
2058 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2060 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2064 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2066 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2068 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2071 vargs
.quick_push (vec_oprnd0
);
2072 vargs
.quick_push (vec_oprnd1
);
2075 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2076 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2077 gimple_call_set_lhs (new_stmt
, new_temp
);
2078 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2081 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2083 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2085 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2088 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2093 /* No current target implements this case. */
2099 /* Update the exception handling table with the vector stmt if necessary. */
2100 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2101 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2103 /* The call in STMT might prevent it from being removed in dce.
2104 We however cannot remove it here, due to the way the ssa name
2105 it defines is mapped to the new definition. So just replace
2106 rhs of the statement with something harmless. */
2111 type
= TREE_TYPE (scalar_dest
);
2112 if (is_pattern_stmt_p (stmt_info
))
2113 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2115 lhs
= gimple_call_lhs (stmt
);
2116 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2117 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2118 set_vinfo_for_stmt (stmt
, NULL
);
2119 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2120 gsi_replace (gsi
, new_stmt
, false);
2121 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2127 /* Function vect_gen_widened_results_half
2129 Create a vector stmt whose code, type, number of arguments, and result
2130 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2131 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2132 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2133 needs to be created (DECL is a function-decl of a target-builtin).
2134 STMT is the original scalar stmt that we are vectorizing. */
2137 vect_gen_widened_results_half (enum tree_code code
,
2139 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2140 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2146 /* Generate half of the widened result: */
2147 if (code
== CALL_EXPR
)
2149 /* Target specific support */
2150 if (op_type
== binary_op
)
2151 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2153 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2154 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2155 gimple_call_set_lhs (new_stmt
, new_temp
);
2159 /* Generic support */
2160 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2161 if (op_type
!= binary_op
)
2163 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2165 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2166 gimple_assign_set_lhs (new_stmt
, new_temp
);
2168 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2174 /* Get vectorized definitions for loop-based vectorization. For the first
2175 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2176 scalar operand), and for the rest we get a copy with
2177 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2178 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2179 The vectors are collected into VEC_OPRNDS. */
2182 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2183 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
2187 /* Get first vector operand. */
2188 /* All the vector operands except the very first one (that is scalar oprnd)
2190 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2191 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2193 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2195 vec_oprnds
->quick_push (vec_oprnd
);
2197 /* Get second vector operand. */
2198 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2199 vec_oprnds
->quick_push (vec_oprnd
);
2203 /* For conversion in multiple steps, continue to get operands
2206 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2210 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2211 For multi-step conversions store the resulting vectors and call the function
2215 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
2216 int multi_step_cvt
, gimple stmt
,
2218 gimple_stmt_iterator
*gsi
,
2219 slp_tree slp_node
, enum tree_code code
,
2220 stmt_vec_info
*prev_stmt_info
)
2223 tree vop0
, vop1
, new_tmp
, vec_dest
;
2225 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2227 vec_dest
= vec_dsts
.pop ();
2229 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
2231 /* Create demotion operation. */
2232 vop0
= (*vec_oprnds
)[i
];
2233 vop1
= (*vec_oprnds
)[i
+ 1];
2234 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2235 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2236 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2237 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2240 /* Store the resulting vector for next recursive call. */
2241 (*vec_oprnds
)[i
/2] = new_tmp
;
2244 /* This is the last step of the conversion sequence. Store the
2245 vectors in SLP_NODE or in vector info of the scalar statement
2246 (or in STMT_VINFO_RELATED_STMT chain). */
2248 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2251 if (!*prev_stmt_info
)
2252 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2254 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2256 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2261 /* For multi-step demotion operations we first generate demotion operations
2262 from the source type to the intermediate types, and then combine the
2263 results (stored in VEC_OPRNDS) in demotion operation to the destination
2267 /* At each level of recursion we have half of the operands we had at the
2269 vec_oprnds
->truncate ((i
+1)/2);
2270 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2271 stmt
, vec_dsts
, gsi
, slp_node
,
2272 VEC_PACK_TRUNC_EXPR
,
2276 vec_dsts
.quick_push (vec_dest
);
2280 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2281 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2282 the resulting vectors and call the function recursively. */
2285 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
2286 vec
<tree
> *vec_oprnds1
,
2287 gimple stmt
, tree vec_dest
,
2288 gimple_stmt_iterator
*gsi
,
2289 enum tree_code code1
,
2290 enum tree_code code2
, tree decl1
,
2291 tree decl2
, int op_type
)
2294 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2295 gimple new_stmt1
, new_stmt2
;
2296 vec
<tree
> vec_tmp
= vNULL
;
2298 vec_tmp
.create (vec_oprnds0
->length () * 2);
2299 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
2301 if (op_type
== binary_op
)
2302 vop1
= (*vec_oprnds1
)[i
];
2306 /* Generate the two halves of promotion operation. */
2307 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2308 op_type
, vec_dest
, gsi
, stmt
);
2309 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2310 op_type
, vec_dest
, gsi
, stmt
);
2311 if (is_gimple_call (new_stmt1
))
2313 new_tmp1
= gimple_call_lhs (new_stmt1
);
2314 new_tmp2
= gimple_call_lhs (new_stmt2
);
2318 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2319 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2322 /* Store the results for the next step. */
2323 vec_tmp
.quick_push (new_tmp1
);
2324 vec_tmp
.quick_push (new_tmp2
);
2327 vec_oprnds0
->release ();
2328 *vec_oprnds0
= vec_tmp
;
2332 /* Check if STMT performs a conversion operation, that can be vectorized.
2333 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2334 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2335 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2338 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2339 gimple
*vec_stmt
, slp_tree slp_node
)
2343 tree op0
, op1
= NULL_TREE
;
2344 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2345 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2346 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2347 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2348 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2349 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2353 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2354 gimple new_stmt
= NULL
;
2355 stmt_vec_info prev_stmt_info
;
2358 tree vectype_out
, vectype_in
;
2360 tree lhs_type
, rhs_type
;
2361 enum { NARROW
, NONE
, WIDEN
} modifier
;
2362 vec
<tree
> vec_oprnds0
= vNULL
;
2363 vec
<tree
> vec_oprnds1
= vNULL
;
2365 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2366 int multi_step_cvt
= 0;
2367 vec
<tree
> vec_dsts
= vNULL
;
2368 vec
<tree
> interm_types
= vNULL
;
2369 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2371 enum machine_mode rhs_mode
;
2372 unsigned short fltsz
;
2374 /* Is STMT a vectorizable conversion? */
2376 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2379 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2382 if (!is_gimple_assign (stmt
))
2385 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2388 code
= gimple_assign_rhs_code (stmt
);
2389 if (!CONVERT_EXPR_CODE_P (code
)
2390 && code
!= FIX_TRUNC_EXPR
2391 && code
!= FLOAT_EXPR
2392 && code
!= WIDEN_MULT_EXPR
2393 && code
!= WIDEN_LSHIFT_EXPR
)
2396 op_type
= TREE_CODE_LENGTH (code
);
2398 /* Check types of lhs and rhs. */
2399 scalar_dest
= gimple_assign_lhs (stmt
);
2400 lhs_type
= TREE_TYPE (scalar_dest
);
2401 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2403 op0
= gimple_assign_rhs1 (stmt
);
2404 rhs_type
= TREE_TYPE (op0
);
2406 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2407 && !((INTEGRAL_TYPE_P (lhs_type
)
2408 && INTEGRAL_TYPE_P (rhs_type
))
2409 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2410 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2413 if ((INTEGRAL_TYPE_P (lhs_type
)
2414 && (TYPE_PRECISION (lhs_type
)
2415 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2416 || (INTEGRAL_TYPE_P (rhs_type
)
2417 && (TYPE_PRECISION (rhs_type
)
2418 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2420 if (dump_enabled_p ())
2421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2422 "type conversion to/from bit-precision unsupported."
2427 /* Check the operands of the operation. */
2428 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2429 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2433 "use not simple.\n");
2436 if (op_type
== binary_op
)
2440 op1
= gimple_assign_rhs2 (stmt
);
2441 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2442 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2444 if (CONSTANT_CLASS_P (op0
))
2445 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2446 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2448 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2453 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2455 "use not simple.\n");
2460 /* If op0 is an external or constant defs use a vector type of
2461 the same size as the output vector type. */
2463 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2465 gcc_assert (vectype_in
);
2468 if (dump_enabled_p ())
2470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2471 "no vectype for scalar type ");
2472 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2473 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2479 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2480 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2481 if (nunits_in
< nunits_out
)
2483 else if (nunits_out
== nunits_in
)
2488 /* Multiple types in SLP are handled by creating the appropriate number of
2489 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2491 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2493 else if (modifier
== NARROW
)
2494 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2496 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2498 /* Sanity check: make sure that at least one copy of the vectorized stmt
2499 needs to be generated. */
2500 gcc_assert (ncopies
>= 1);
2502 /* Supportable by target? */
2506 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2508 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2513 if (dump_enabled_p ())
2514 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2515 "conversion not supported by target.\n");
2519 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2520 &code1
, &code2
, &multi_step_cvt
,
2523 /* Binary widening operation can only be supported directly by the
2525 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2529 if (code
!= FLOAT_EXPR
2530 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2531 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2534 rhs_mode
= TYPE_MODE (rhs_type
);
2535 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2536 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2537 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2538 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2541 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2542 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2543 if (cvt_type
== NULL_TREE
)
2546 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2548 if (!supportable_convert_operation (code
, vectype_out
,
2549 cvt_type
, &decl1
, &codecvt1
))
2552 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2553 cvt_type
, &codecvt1
,
2554 &codecvt2
, &multi_step_cvt
,
2558 gcc_assert (multi_step_cvt
== 0);
2560 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2561 vectype_in
, &code1
, &code2
,
2562 &multi_step_cvt
, &interm_types
))
2566 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2569 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2570 codecvt2
= ERROR_MARK
;
2574 interm_types
.safe_push (cvt_type
);
2575 cvt_type
= NULL_TREE
;
2580 gcc_assert (op_type
== unary_op
);
2581 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2582 &code1
, &multi_step_cvt
,
2586 if (code
!= FIX_TRUNC_EXPR
2587 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2588 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2591 rhs_mode
= TYPE_MODE (rhs_type
);
2593 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2594 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2595 if (cvt_type
== NULL_TREE
)
2597 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2600 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2601 &code1
, &multi_step_cvt
,
2610 if (!vec_stmt
) /* transformation not required. */
2612 if (dump_enabled_p ())
2613 dump_printf_loc (MSG_NOTE
, vect_location
,
2614 "=== vectorizable_conversion ===\n");
2615 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2617 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2618 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2620 else if (modifier
== NARROW
)
2622 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2623 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2627 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2628 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2630 interm_types
.release ();
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_NOTE
, vect_location
,
2637 "transform conversion. ncopies = %d.\n", ncopies
);
2639 if (op_type
== binary_op
)
2641 if (CONSTANT_CLASS_P (op0
))
2642 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2643 else if (CONSTANT_CLASS_P (op1
))
2644 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2647 /* In case of multi-step conversion, we first generate conversion operations
2648 to the intermediate types, and then from that types to the final one.
2649 We create vector destinations for the intermediate type (TYPES) received
2650 from supportable_*_operation, and store them in the correct order
2651 for future use in vect_create_vectorized_*_stmts (). */
2652 vec_dsts
.create (multi_step_cvt
+ 1);
2653 vec_dest
= vect_create_destination_var (scalar_dest
,
2654 (cvt_type
&& modifier
== WIDEN
)
2655 ? cvt_type
: vectype_out
);
2656 vec_dsts
.quick_push (vec_dest
);
2660 for (i
= interm_types
.length () - 1;
2661 interm_types
.iterate (i
, &intermediate_type
); i
--)
2663 vec_dest
= vect_create_destination_var (scalar_dest
,
2665 vec_dsts
.quick_push (vec_dest
);
2670 vec_dest
= vect_create_destination_var (scalar_dest
,
2672 ? vectype_out
: cvt_type
);
2676 if (modifier
== WIDEN
)
2678 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
2679 if (op_type
== binary_op
)
2680 vec_oprnds1
.create (1);
2682 else if (modifier
== NARROW
)
2683 vec_oprnds0
.create (
2684 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
2686 else if (code
== WIDEN_LSHIFT_EXPR
)
2687 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
2690 prev_stmt_info
= NULL
;
2694 for (j
= 0; j
< ncopies
; j
++)
2697 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2700 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2702 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2704 /* Arguments are ready, create the new vector stmt. */
2705 if (code1
== CALL_EXPR
)
2707 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2708 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2709 gimple_call_set_lhs (new_stmt
, new_temp
);
2713 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2714 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2716 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2717 gimple_assign_set_lhs (new_stmt
, new_temp
);
2720 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2722 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2726 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2728 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2729 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2734 /* In case the vectorization factor (VF) is bigger than the number
2735 of elements that we can fit in a vectype (nunits), we have to
2736 generate more than one vector stmt - i.e - we need to "unroll"
2737 the vector stmt by a factor VF/nunits. */
2738 for (j
= 0; j
< ncopies
; j
++)
2745 if (code
== WIDEN_LSHIFT_EXPR
)
2750 /* Store vec_oprnd1 for every vector stmt to be created
2751 for SLP_NODE. We check during the analysis that all
2752 the shift arguments are the same. */
2753 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2754 vec_oprnds1
.quick_push (vec_oprnd1
);
2756 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2760 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2761 &vec_oprnds1
, slp_node
, -1);
2765 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2766 vec_oprnds0
.quick_push (vec_oprnd0
);
2767 if (op_type
== binary_op
)
2769 if (code
== WIDEN_LSHIFT_EXPR
)
2772 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2774 vec_oprnds1
.quick_push (vec_oprnd1
);
2780 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2781 vec_oprnds0
.truncate (0);
2782 vec_oprnds0
.quick_push (vec_oprnd0
);
2783 if (op_type
== binary_op
)
2785 if (code
== WIDEN_LSHIFT_EXPR
)
2788 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2790 vec_oprnds1
.truncate (0);
2791 vec_oprnds1
.quick_push (vec_oprnd1
);
2795 /* Arguments are ready. Create the new vector stmts. */
2796 for (i
= multi_step_cvt
; i
>= 0; i
--)
2798 tree this_dest
= vec_dsts
[i
];
2799 enum tree_code c1
= code1
, c2
= code2
;
2800 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2805 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2807 stmt
, this_dest
, gsi
,
2808 c1
, c2
, decl1
, decl2
,
2812 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2816 if (codecvt1
== CALL_EXPR
)
2818 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2819 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2820 gimple_call_set_lhs (new_stmt
, new_temp
);
2824 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2825 new_temp
= make_ssa_name (vec_dest
, NULL
);
2826 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2831 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2834 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2837 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2840 if (!prev_stmt_info
)
2841 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2843 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2844 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2849 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2853 /* In case the vectorization factor (VF) is bigger than the number
2854 of elements that we can fit in a vectype (nunits), we have to
2855 generate more than one vector stmt - i.e - we need to "unroll"
2856 the vector stmt by a factor VF/nunits. */
2857 for (j
= 0; j
< ncopies
; j
++)
2861 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2865 vec_oprnds0
.truncate (0);
2866 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2867 vect_pow2 (multi_step_cvt
) - 1);
2870 /* Arguments are ready. Create the new vector stmts. */
2872 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2874 if (codecvt1
== CALL_EXPR
)
2876 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2877 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2878 gimple_call_set_lhs (new_stmt
, new_temp
);
2882 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2883 new_temp
= make_ssa_name (vec_dest
, NULL
);
2884 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2888 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2889 vec_oprnds0
[i
] = new_temp
;
2892 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2893 stmt
, vec_dsts
, gsi
,
2898 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2902 vec_oprnds0
.release ();
2903 vec_oprnds1
.release ();
2904 vec_dsts
.release ();
2905 interm_types
.release ();
2911 /* Function vectorizable_assignment.
2913 Check if STMT performs an assignment (copy) that can be vectorized.
2914 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2915 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2916 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2919 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2920 gimple
*vec_stmt
, slp_tree slp_node
)
2925 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2926 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2927 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2931 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2932 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2935 vec
<tree
> vec_oprnds
= vNULL
;
2937 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2938 gimple new_stmt
= NULL
;
2939 stmt_vec_info prev_stmt_info
= NULL
;
2940 enum tree_code code
;
2943 /* Multiple types in SLP are handled by creating the appropriate number of
2944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2946 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2949 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2951 gcc_assert (ncopies
>= 1);
2953 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2956 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2959 /* Is vectorizable assignment? */
2960 if (!is_gimple_assign (stmt
))
2963 scalar_dest
= gimple_assign_lhs (stmt
);
2964 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2967 code
= gimple_assign_rhs_code (stmt
);
2968 if (gimple_assign_single_p (stmt
)
2969 || code
== PAREN_EXPR
2970 || CONVERT_EXPR_CODE_P (code
))
2971 op
= gimple_assign_rhs1 (stmt
);
2975 if (code
== VIEW_CONVERT_EXPR
)
2976 op
= TREE_OPERAND (op
, 0);
2978 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2979 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2981 if (dump_enabled_p ())
2982 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2983 "use not simple.\n");
2987 /* We can handle NOP_EXPR conversions that do not change the number
2988 of elements or the vector size. */
2989 if ((CONVERT_EXPR_CODE_P (code
)
2990 || code
== VIEW_CONVERT_EXPR
)
2992 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2993 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2994 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2997 /* We do not handle bit-precision changes. */
2998 if ((CONVERT_EXPR_CODE_P (code
)
2999 || code
== VIEW_CONVERT_EXPR
)
3000 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
3001 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3002 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3003 || ((TYPE_PRECISION (TREE_TYPE (op
))
3004 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
3005 /* But a conversion that does not change the bit-pattern is ok. */
3006 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3007 > TYPE_PRECISION (TREE_TYPE (op
)))
3008 && TYPE_UNSIGNED (TREE_TYPE (op
))))
3010 if (dump_enabled_p ())
3011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3012 "type conversion to/from bit-precision "
3017 if (!vec_stmt
) /* transformation not required. */
3019 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3020 if (dump_enabled_p ())
3021 dump_printf_loc (MSG_NOTE
, vect_location
,
3022 "=== vectorizable_assignment ===\n");
3023 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3028 if (dump_enabled_p ())
3029 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
3032 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3035 for (j
= 0; j
< ncopies
; j
++)
3039 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
3041 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
3043 /* Arguments are ready. create the new vector stmt. */
3044 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3046 if (CONVERT_EXPR_CODE_P (code
)
3047 || code
== VIEW_CONVERT_EXPR
)
3048 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
3049 new_stmt
= gimple_build_assign (vec_dest
, vop
);
3050 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3051 gimple_assign_set_lhs (new_stmt
, new_temp
);
3052 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3054 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3061 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3063 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3065 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3068 vec_oprnds
.release ();
3073 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3074 either as shift by a scalar or by a vector. */
3077 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
3080 enum machine_mode vec_mode
;
3085 vectype
= get_vectype_for_scalar_type (scalar_type
);
3089 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3091 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
3093 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3095 || (optab_handler (optab
, TYPE_MODE (vectype
))
3096 == CODE_FOR_nothing
))
3100 vec_mode
= TYPE_MODE (vectype
);
3101 icode
= (int) optab_handler (optab
, vec_mode
);
3102 if (icode
== CODE_FOR_nothing
)
3109 /* Function vectorizable_shift.
3111 Check if STMT performs a shift operation that can be vectorized.
3112 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3113 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3114 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3117 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3118 gimple
*vec_stmt
, slp_tree slp_node
)
3122 tree op0
, op1
= NULL
;
3123 tree vec_oprnd1
= NULL_TREE
;
3124 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3126 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3127 enum tree_code code
;
3128 enum machine_mode vec_mode
;
3132 enum machine_mode optab_op2_mode
;
3135 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3136 gimple new_stmt
= NULL
;
3137 stmt_vec_info prev_stmt_info
;
3144 vec
<tree
> vec_oprnds0
= vNULL
;
3145 vec
<tree
> vec_oprnds1
= vNULL
;
3148 bool scalar_shift_arg
= true;
3149 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3152 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3155 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3158 /* Is STMT a vectorizable binary/unary operation? */
3159 if (!is_gimple_assign (stmt
))
3162 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3165 code
= gimple_assign_rhs_code (stmt
);
3167 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3168 || code
== RROTATE_EXPR
))
3171 scalar_dest
= gimple_assign_lhs (stmt
);
3172 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3173 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3174 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3176 if (dump_enabled_p ())
3177 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3178 "bit-precision shifts not supported.\n");
3182 op0
= gimple_assign_rhs1 (stmt
);
3183 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3184 &def_stmt
, &def
, &dt
[0], &vectype
))
3186 if (dump_enabled_p ())
3187 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3188 "use not simple.\n");
3191 /* If op0 is an external or constant def use a vector type with
3192 the same size as the output vector type. */
3194 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3196 gcc_assert (vectype
);
3199 if (dump_enabled_p ())
3200 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3201 "no vectype for scalar type\n");
3205 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3206 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3207 if (nunits_out
!= nunits_in
)
3210 op1
= gimple_assign_rhs2 (stmt
);
3211 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3212 &def
, &dt
[1], &op1_vectype
))
3214 if (dump_enabled_p ())
3215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3216 "use not simple.\n");
3221 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3225 /* Multiple types in SLP are handled by creating the appropriate number of
3226 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3228 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3231 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3233 gcc_assert (ncopies
>= 1);
3235 /* Determine whether the shift amount is a vector, or scalar. If the
3236 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3238 if (dt
[1] == vect_internal_def
&& !slp_node
)
3239 scalar_shift_arg
= false;
3240 else if (dt
[1] == vect_constant_def
3241 || dt
[1] == vect_external_def
3242 || dt
[1] == vect_internal_def
)
3244 /* In SLP, need to check whether the shift count is the same,
3245 in loops if it is a constant or invariant, it is always
3249 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3252 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
3253 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3254 scalar_shift_arg
= false;
3259 if (dump_enabled_p ())
3260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3261 "operand mode requires invariant argument.\n");
3265 /* Vector shifted by vector. */
3266 if (!scalar_shift_arg
)
3268 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3269 if (dump_enabled_p ())
3270 dump_printf_loc (MSG_NOTE
, vect_location
,
3271 "vector/vector shift/rotate found.\n");
3274 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3275 if (op1_vectype
== NULL_TREE
3276 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3280 "unusable type for last operand in"
3281 " vector/vector shift/rotate.\n");
3285 /* See if the machine has a vector shifted by scalar insn and if not
3286 then see if it has a vector shifted by vector insn. */
3289 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3291 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_NOTE
, vect_location
,
3295 "vector/scalar shift/rotate found.\n");
3299 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3301 && (optab_handler (optab
, TYPE_MODE (vectype
))
3302 != CODE_FOR_nothing
))
3304 scalar_shift_arg
= false;
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_NOTE
, vect_location
,
3308 "vector/vector shift/rotate found.\n");
3310 /* Unlike the other binary operators, shifts/rotates have
3311 the rhs being int, instead of the same type as the lhs,
3312 so make sure the scalar is the right type if we are
3313 dealing with vectors of long long/long/short/char. */
3314 if (dt
[1] == vect_constant_def
)
3315 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3316 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3320 && TYPE_MODE (TREE_TYPE (vectype
))
3321 != TYPE_MODE (TREE_TYPE (op1
)))
3323 if (dump_enabled_p ())
3324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3325 "unusable type for last operand in"
3326 " vector/vector shift/rotate.\n");
3329 if (vec_stmt
&& !slp_node
)
3331 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3332 op1
= vect_init_vector (stmt
, op1
,
3333 TREE_TYPE (vectype
), NULL
);
3340 /* Supportable by target? */
3343 if (dump_enabled_p ())
3344 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3348 vec_mode
= TYPE_MODE (vectype
);
3349 icode
= (int) optab_handler (optab
, vec_mode
);
3350 if (icode
== CODE_FOR_nothing
)
3352 if (dump_enabled_p ())
3353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3354 "op not supported by target.\n");
3355 /* Check only during analysis. */
3356 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3357 || (vf
< vect_min_worthwhile_factor (code
)
3360 if (dump_enabled_p ())
3361 dump_printf_loc (MSG_NOTE
, vect_location
,
3362 "proceeding using word mode.\n");
3365 /* Worthwhile without SIMD support? Check only during analysis. */
3366 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3367 && vf
< vect_min_worthwhile_factor (code
)
3370 if (dump_enabled_p ())
3371 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3372 "not worthwhile without SIMD support.\n");
3376 if (!vec_stmt
) /* transformation not required. */
3378 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3379 if (dump_enabled_p ())
3380 dump_printf_loc (MSG_NOTE
, vect_location
,
3381 "=== vectorizable_shift ===\n");
3382 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3388 if (dump_enabled_p ())
3389 dump_printf_loc (MSG_NOTE
, vect_location
,
3390 "transform binary/unary operation.\n");
3393 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3395 prev_stmt_info
= NULL
;
3396 for (j
= 0; j
< ncopies
; j
++)
3401 if (scalar_shift_arg
)
3403 /* Vector shl and shr insn patterns can be defined with scalar
3404 operand 2 (shift operand). In this case, use constant or loop
3405 invariant op1 directly, without extending it to vector mode
3407 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3408 if (!VECTOR_MODE_P (optab_op2_mode
))
3410 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_NOTE
, vect_location
,
3412 "operand 1 using scalar mode.\n");
3414 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
3415 vec_oprnds1
.quick_push (vec_oprnd1
);
3418 /* Store vec_oprnd1 for every vector stmt to be created
3419 for SLP_NODE. We check during the analysis that all
3420 the shift arguments are the same.
3421 TODO: Allow different constants for different vector
3422 stmts generated for an SLP instance. */
3423 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3424 vec_oprnds1
.quick_push (vec_oprnd1
);
3429 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3430 (a special case for certain kind of vector shifts); otherwise,
3431 operand 1 should be of a vector type (the usual case). */
3433 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3436 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3440 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3442 /* Arguments are ready. Create the new vector stmt. */
3443 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3445 vop1
= vec_oprnds1
[i
];
3446 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3447 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3448 gimple_assign_set_lhs (new_stmt
, new_temp
);
3449 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3451 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3458 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3460 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3461 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3464 vec_oprnds0
.release ();
3465 vec_oprnds1
.release ();
3471 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3472 gimple_stmt_iterator
*);
3475 /* Function vectorizable_operation.
3477 Check if STMT performs a binary, unary or ternary operation that can
3479 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3480 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3481 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3484 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3485 gimple
*vec_stmt
, slp_tree slp_node
)
3489 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3490 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3492 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3493 enum tree_code code
;
3494 enum machine_mode vec_mode
;
3501 enum vect_def_type dt
[3]
3502 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3503 gimple new_stmt
= NULL
;
3504 stmt_vec_info prev_stmt_info
;
3510 vec
<tree
> vec_oprnds0
= vNULL
;
3511 vec
<tree
> vec_oprnds1
= vNULL
;
3512 vec
<tree
> vec_oprnds2
= vNULL
;
3513 tree vop0
, vop1
, vop2
;
3514 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3517 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3520 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3523 /* Is STMT a vectorizable binary/unary operation? */
3524 if (!is_gimple_assign (stmt
))
3527 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3530 code
= gimple_assign_rhs_code (stmt
);
3532 /* For pointer addition, we should use the normal plus for
3533 the vector addition. */
3534 if (code
== POINTER_PLUS_EXPR
)
3537 /* Support only unary or binary operations. */
3538 op_type
= TREE_CODE_LENGTH (code
);
3539 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3541 if (dump_enabled_p ())
3542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3543 "num. args = %d (not unary/binary/ternary op).\n",
3548 scalar_dest
= gimple_assign_lhs (stmt
);
3549 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3551 /* Most operations cannot handle bit-precision types without extra
3553 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3554 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3555 /* Exception are bitwise binary operations. */
3556 && code
!= BIT_IOR_EXPR
3557 && code
!= BIT_XOR_EXPR
3558 && code
!= BIT_AND_EXPR
)
3560 if (dump_enabled_p ())
3561 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3562 "bit-precision arithmetic not supported.\n");
3566 op0
= gimple_assign_rhs1 (stmt
);
3567 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3568 &def_stmt
, &def
, &dt
[0], &vectype
))
3570 if (dump_enabled_p ())
3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3572 "use not simple.\n");
3575 /* If op0 is an external or constant def use a vector type with
3576 the same size as the output vector type. */
3578 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3580 gcc_assert (vectype
);
3583 if (dump_enabled_p ())
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3586 "no vectype for scalar type ");
3587 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3589 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3595 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3596 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3597 if (nunits_out
!= nunits_in
)
3600 if (op_type
== binary_op
|| op_type
== ternary_op
)
3602 op1
= gimple_assign_rhs2 (stmt
);
3603 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3606 if (dump_enabled_p ())
3607 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3608 "use not simple.\n");
3612 if (op_type
== ternary_op
)
3614 op2
= gimple_assign_rhs3 (stmt
);
3615 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3618 if (dump_enabled_p ())
3619 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3620 "use not simple.\n");
3626 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3630 /* Multiple types in SLP are handled by creating the appropriate number of
3631 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3633 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3636 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3638 gcc_assert (ncopies
>= 1);
3640 /* Shifts are handled in vectorizable_shift (). */
3641 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3642 || code
== RROTATE_EXPR
)
3645 /* Supportable by target? */
3647 vec_mode
= TYPE_MODE (vectype
);
3648 if (code
== MULT_HIGHPART_EXPR
)
3650 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3651 icode
= LAST_INSN_CODE
;
3653 icode
= CODE_FOR_nothing
;
3657 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3660 if (dump_enabled_p ())
3661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3665 icode
= (int) optab_handler (optab
, vec_mode
);
3668 if (icode
== CODE_FOR_nothing
)
3670 if (dump_enabled_p ())
3671 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3672 "op not supported by target.\n");
3673 /* Check only during analysis. */
3674 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3675 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3677 if (dump_enabled_p ())
3678 dump_printf_loc (MSG_NOTE
, vect_location
,
3679 "proceeding using word mode.\n");
3682 /* Worthwhile without SIMD support? Check only during analysis. */
3683 if (!VECTOR_MODE_P (vec_mode
)
3685 && vf
< vect_min_worthwhile_factor (code
))
3687 if (dump_enabled_p ())
3688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3689 "not worthwhile without SIMD support.\n");
3693 if (!vec_stmt
) /* transformation not required. */
3695 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3696 if (dump_enabled_p ())
3697 dump_printf_loc (MSG_NOTE
, vect_location
,
3698 "=== vectorizable_operation ===\n");
3699 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3705 if (dump_enabled_p ())
3706 dump_printf_loc (MSG_NOTE
, vect_location
,
3707 "transform binary/unary operation.\n");
3710 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3712 /* In case the vectorization factor (VF) is bigger than the number
3713 of elements that we can fit in a vectype (nunits), we have to generate
3714 more than one vector stmt - i.e - we need to "unroll" the
3715 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3716 from one copy of the vector stmt to the next, in the field
3717 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3718 stages to find the correct vector defs to be used when vectorizing
3719 stmts that use the defs of the current stmt. The example below
3720 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3721 we need to create 4 vectorized stmts):
3723 before vectorization:
3724 RELATED_STMT VEC_STMT
3728 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3730 RELATED_STMT VEC_STMT
3731 VS1_0: vx0 = memref0 VS1_1 -
3732 VS1_1: vx1 = memref1 VS1_2 -
3733 VS1_2: vx2 = memref2 VS1_3 -
3734 VS1_3: vx3 = memref3 - -
3735 S1: x = load - VS1_0
3738 step2: vectorize stmt S2 (done here):
3739 To vectorize stmt S2 we first need to find the relevant vector
3740 def for the first operand 'x'. This is, as usual, obtained from
3741 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3742 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3743 relevant vector def 'vx0'. Having found 'vx0' we can generate
3744 the vector stmt VS2_0, and as usual, record it in the
3745 STMT_VINFO_VEC_STMT of stmt S2.
3746 When creating the second copy (VS2_1), we obtain the relevant vector
3747 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3748 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3749 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3750 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3751 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3752 chain of stmts and pointers:
3753 RELATED_STMT VEC_STMT
3754 VS1_0: vx0 = memref0 VS1_1 -
3755 VS1_1: vx1 = memref1 VS1_2 -
3756 VS1_2: vx2 = memref2 VS1_3 -
3757 VS1_3: vx3 = memref3 - -
3758 S1: x = load - VS1_0
3759 VS2_0: vz0 = vx0 + v1 VS2_1 -
3760 VS2_1: vz1 = vx1 + v1 VS2_2 -
3761 VS2_2: vz2 = vx2 + v1 VS2_3 -
3762 VS2_3: vz3 = vx3 + v1 - -
3763 S2: z = x + 1 - VS2_0 */
3765 prev_stmt_info
= NULL
;
3766 for (j
= 0; j
< ncopies
; j
++)
3771 if (op_type
== binary_op
|| op_type
== ternary_op
)
3772 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3775 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3777 if (op_type
== ternary_op
)
3779 vec_oprnds2
.create (1);
3780 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
3787 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3788 if (op_type
== ternary_op
)
3790 tree vec_oprnd
= vec_oprnds2
.pop ();
3791 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
3796 /* Arguments are ready. Create the new vector stmt. */
3797 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3799 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3800 ? vec_oprnds1
[i
] : NULL_TREE
);
3801 vop2
= ((op_type
== ternary_op
)
3802 ? vec_oprnds2
[i
] : NULL_TREE
);
3803 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3805 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3806 gimple_assign_set_lhs (new_stmt
, new_temp
);
3807 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3809 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3816 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3818 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3819 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3822 vec_oprnds0
.release ();
3823 vec_oprnds1
.release ();
3824 vec_oprnds2
.release ();
3829 /* A helper function to ensure data reference DR's base alignment
3833 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
3838 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
3840 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3841 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
3843 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
3844 DECL_USER_ALIGN (base_decl
) = 1;
3845 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
3850 /* Function vectorizable_store.
3852 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3859 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3865 tree vec_oprnd
= NULL_TREE
;
3866 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3867 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3868 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3870 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3871 struct loop
*loop
= NULL
;
3872 enum machine_mode vec_mode
;
3874 enum dr_alignment_support alignment_support_scheme
;
3877 enum vect_def_type dt
;
3878 stmt_vec_info prev_stmt_info
= NULL
;
3879 tree dataref_ptr
= NULL_TREE
;
3880 tree dataref_offset
= NULL_TREE
;
3881 gimple ptr_incr
= NULL
;
3882 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3885 gimple next_stmt
, first_stmt
= NULL
;
3886 bool grouped_store
= false;
3887 bool store_lanes_p
= false;
3888 unsigned int group_size
, i
;
3889 vec
<tree
> dr_chain
= vNULL
;
3890 vec
<tree
> oprnds
= vNULL
;
3891 vec
<tree
> result_chain
= vNULL
;
3893 vec
<tree
> vec_oprnds
= vNULL
;
3894 bool slp
= (slp_node
!= NULL
);
3895 unsigned int vec_num
;
3896 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3900 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3902 /* Multiple types in SLP are handled by creating the appropriate number of
3903 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3905 if (slp
|| PURE_SLP_STMT (stmt_info
))
3908 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3910 gcc_assert (ncopies
>= 1);
3912 /* FORNOW. This restriction should be relaxed. */
3913 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3915 if (dump_enabled_p ())
3916 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3917 "multiple types in nested loop.\n");
3921 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3924 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3927 /* Is vectorizable store? */
3929 if (!is_gimple_assign (stmt
))
3932 scalar_dest
= gimple_assign_lhs (stmt
);
3933 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3934 && is_pattern_stmt_p (stmt_info
))
3935 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3936 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3937 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
3938 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3939 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3940 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3941 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3942 && TREE_CODE (scalar_dest
) != MEM_REF
)
3945 gcc_assert (gimple_assign_single_p (stmt
));
3946 op
= gimple_assign_rhs1 (stmt
);
3947 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3950 if (dump_enabled_p ())
3951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3952 "use not simple.\n");
3956 elem_type
= TREE_TYPE (vectype
);
3957 vec_mode
= TYPE_MODE (vectype
);
3959 /* FORNOW. In some cases can vectorize even if data-type not supported
3960 (e.g. - array initialization with 0). */
3961 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3964 if (!STMT_VINFO_DATA_REF (stmt_info
))
3967 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3968 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3969 size_zero_node
) < 0)
3971 if (dump_enabled_p ())
3972 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3973 "negative step for store.\n");
3977 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3979 grouped_store
= true;
3980 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3981 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3983 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3984 if (vect_store_lanes_supported (vectype
, group_size
))
3985 store_lanes_p
= true;
3986 else if (!vect_grouped_store_supported (vectype
, group_size
))
3990 if (first_stmt
== stmt
)
3992 /* STMT is the leader of the group. Check the operands of all the
3993 stmts of the group. */
3994 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3997 gcc_assert (gimple_assign_single_p (next_stmt
));
3998 op
= gimple_assign_rhs1 (next_stmt
);
3999 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
4000 &def_stmt
, &def
, &dt
))
4002 if (dump_enabled_p ())
4003 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4004 "use not simple.\n");
4007 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4012 if (!vec_stmt
) /* transformation not required. */
4014 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
4015 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
4022 ensure_base_align (stmt_info
, dr
);
4026 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4027 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4029 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
4032 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
4034 /* We vectorize all the stmts of the interleaving group when we
4035 reach the last stmt in the group. */
4036 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
4037 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
4046 grouped_store
= false;
4047 /* VEC_NUM is the number of vect stmts to be created for this
4049 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4050 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4051 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4052 op
= gimple_assign_rhs1 (first_stmt
);
4055 /* VEC_NUM is the number of vect stmts to be created for this
4057 vec_num
= group_size
;
4063 group_size
= vec_num
= 1;
4066 if (dump_enabled_p ())
4067 dump_printf_loc (MSG_NOTE
, vect_location
,
4068 "transform store. ncopies = %d\n", ncopies
);
4070 dr_chain
.create (group_size
);
4071 oprnds
.create (group_size
);
4073 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4074 gcc_assert (alignment_support_scheme
);
4075 /* Targets with store-lane instructions must not require explicit
4077 gcc_assert (!store_lanes_p
4078 || alignment_support_scheme
== dr_aligned
4079 || alignment_support_scheme
== dr_unaligned_supported
);
4082 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4084 aggr_type
= vectype
;
4086 /* In case the vectorization factor (VF) is bigger than the number
4087 of elements that we can fit in a vectype (nunits), we have to generate
4088 more than one vector stmt - i.e - we need to "unroll" the
4089 vector stmt by a factor VF/nunits. For more details see documentation in
4090 vect_get_vec_def_for_copy_stmt. */
4092 /* In case of interleaving (non-unit grouped access):
4099 We create vectorized stores starting from base address (the access of the
4100 first stmt in the chain (S2 in the above example), when the last store stmt
4101 of the chain (S4) is reached:
4104 VS2: &base + vec_size*1 = vx0
4105 VS3: &base + vec_size*2 = vx1
4106 VS4: &base + vec_size*3 = vx3
4108 Then permutation statements are generated:
4110 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4111 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4114 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4115 (the order of the data-refs in the output of vect_permute_store_chain
4116 corresponds to the order of scalar stmts in the interleaving chain - see
4117 the documentation of vect_permute_store_chain()).
4119 In case of both multiple types and interleaving, above vector stores and
4120 permutation stmts are created for every copy. The result vector stmts are
4121 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4122 STMT_VINFO_RELATED_STMT for the next copies.
4125 prev_stmt_info
= NULL
;
4126 for (j
= 0; j
< ncopies
; j
++)
4134 /* Get vectorized arguments for SLP_NODE. */
4135 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4136 NULL
, slp_node
, -1);
4138 vec_oprnd
= vec_oprnds
[0];
4142 /* For interleaved stores we collect vectorized defs for all the
4143 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4144 used as an input to vect_permute_store_chain(), and OPRNDS as
4145 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4147 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4148 OPRNDS are of size 1. */
4149 next_stmt
= first_stmt
;
4150 for (i
= 0; i
< group_size
; i
++)
4152 /* Since gaps are not supported for interleaved stores,
4153 GROUP_SIZE is the exact number of stmts in the chain.
4154 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4155 there is no interleaving, GROUP_SIZE is 1, and only one
4156 iteration of the loop will be executed. */
4157 gcc_assert (next_stmt
4158 && gimple_assign_single_p (next_stmt
));
4159 op
= gimple_assign_rhs1 (next_stmt
);
4161 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4163 dr_chain
.quick_push (vec_oprnd
);
4164 oprnds
.quick_push (vec_oprnd
);
4165 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4169 /* We should have catched mismatched types earlier. */
4170 gcc_assert (useless_type_conversion_p (vectype
,
4171 TREE_TYPE (vec_oprnd
)));
4172 bool simd_lane_access_p
4173 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
4174 if (simd_lane_access_p
4175 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
4176 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
4177 && integer_zerop (DR_OFFSET (first_dr
))
4178 && integer_zerop (DR_INIT (first_dr
))
4179 && alias_sets_conflict_p (get_alias_set (aggr_type
),
4180 get_alias_set (DR_REF (first_dr
))))
4182 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
4183 dataref_offset
= build_int_cst (reference_alias_ptr_type
4184 (DR_REF (first_dr
)), 0);
4189 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
4190 simd_lane_access_p
? loop
: NULL
,
4191 NULL_TREE
, &dummy
, gsi
, &ptr_incr
,
4192 simd_lane_access_p
, &inv_p
);
4193 gcc_assert (bb_vinfo
|| !inv_p
);
4197 /* For interleaved stores we created vectorized defs for all the
4198 defs stored in OPRNDS in the previous iteration (previous copy).
4199 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4200 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4202 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4203 OPRNDS are of size 1. */
4204 for (i
= 0; i
< group_size
; i
++)
4207 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4209 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4210 dr_chain
[i
] = vec_oprnd
;
4211 oprnds
[i
] = vec_oprnd
;
4215 = int_const_binop (PLUS_EXPR
, dataref_offset
,
4216 TYPE_SIZE_UNIT (aggr_type
));
4218 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4219 TYPE_SIZE_UNIT (aggr_type
));
4226 /* Combine all the vectors into an array. */
4227 vec_array
= create_vector_array (vectype
, vec_num
);
4228 for (i
= 0; i
< vec_num
; i
++)
4230 vec_oprnd
= dr_chain
[i
];
4231 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4235 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4236 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4237 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4238 gimple_call_set_lhs (new_stmt
, data_ref
);
4239 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4247 result_chain
.create (group_size
);
4249 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4253 next_stmt
= first_stmt
;
4254 for (i
= 0; i
< vec_num
; i
++)
4256 unsigned align
, misalign
;
4259 /* Bump the vector pointer. */
4260 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4264 vec_oprnd
= vec_oprnds
[i
];
4265 else if (grouped_store
)
4266 /* For grouped stores vectorized defs are interleaved in
4267 vect_permute_store_chain(). */
4268 vec_oprnd
= result_chain
[i
];
4270 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4273 : build_int_cst (reference_alias_ptr_type
4274 (DR_REF (first_dr
)), 0));
4275 align
= TYPE_ALIGN_UNIT (vectype
);
4276 if (aligned_access_p (first_dr
))
4278 else if (DR_MISALIGNMENT (first_dr
) == -1)
4280 TREE_TYPE (data_ref
)
4281 = build_aligned_type (TREE_TYPE (data_ref
),
4282 TYPE_ALIGN (elem_type
));
4283 align
= TYPE_ALIGN_UNIT (elem_type
);
4288 TREE_TYPE (data_ref
)
4289 = build_aligned_type (TREE_TYPE (data_ref
),
4290 TYPE_ALIGN (elem_type
));
4291 misalign
= DR_MISALIGNMENT (first_dr
);
4293 if (dataref_offset
== NULL_TREE
)
4294 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4297 /* Arguments are ready. Create the new vector stmt. */
4298 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4299 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4304 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4312 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4314 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4315 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4319 dr_chain
.release ();
4321 result_chain
.release ();
4322 vec_oprnds
.release ();
4327 /* Given a vector type VECTYPE and permutation SEL returns
4328 the VECTOR_CST mask that implements the permutation of the
4329 vector elements. If that is impossible to do, returns NULL. */
4332 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4334 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4337 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4339 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4342 mask_elt_type
= lang_hooks
.types
.type_for_mode
4343 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4344 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4346 mask_elts
= XALLOCAVEC (tree
, nunits
);
4347 for (i
= nunits
- 1; i
>= 0; i
--)
4348 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4349 mask_vec
= build_vector (mask_type
, mask_elts
);
4354 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4355 reversal of the vector elements. If that is impossible to do,
4359 perm_mask_for_reverse (tree vectype
)
4364 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4365 sel
= XALLOCAVEC (unsigned char, nunits
);
4367 for (i
= 0; i
< nunits
; ++i
)
4368 sel
[i
] = nunits
- 1 - i
;
4370 return vect_gen_perm_mask (vectype
, sel
);
4373 /* Given a vector variable X and Y, that was generated for the scalar
4374 STMT, generate instructions to permute the vector elements of X and Y
4375 using permutation mask MASK_VEC, insert them at *GSI and return the
4376 permuted vector variable. */
4379 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4380 gimple_stmt_iterator
*gsi
)
4382 tree vectype
= TREE_TYPE (x
);
4383 tree perm_dest
, data_ref
;
4386 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4387 data_ref
= make_ssa_name (perm_dest
, NULL
);
4389 /* Generate the permute statement. */
4390 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4392 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4397 /* vectorizable_load.
4399 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4401 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4402 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4403 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4406 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4407 slp_tree slp_node
, slp_instance slp_node_instance
)
4410 tree vec_dest
= NULL
;
4411 tree data_ref
= NULL
;
4412 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4413 stmt_vec_info prev_stmt_info
;
4414 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4415 struct loop
*loop
= NULL
;
4416 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4417 bool nested_in_vect_loop
= false;
4418 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4419 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4422 enum machine_mode mode
;
4423 gimple new_stmt
= NULL
;
4425 enum dr_alignment_support alignment_support_scheme
;
4426 tree dataref_ptr
= NULL_TREE
;
4427 tree dataref_offset
= NULL_TREE
;
4428 gimple ptr_incr
= NULL
;
4429 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4431 int i
, j
, group_size
, group_gap
;
4432 tree msq
= NULL_TREE
, lsq
;
4433 tree offset
= NULL_TREE
;
4434 tree realignment_token
= NULL_TREE
;
4436 vec
<tree
> dr_chain
= vNULL
;
4437 bool grouped_load
= false;
4438 bool load_lanes_p
= false;
4441 bool negative
= false;
4442 bool compute_in_loop
= false;
4443 struct loop
*at_loop
;
4445 bool slp
= (slp_node
!= NULL
);
4446 bool slp_perm
= false;
4447 enum tree_code code
;
4448 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4451 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4452 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4453 int gather_scale
= 1;
4454 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4458 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4459 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4460 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4465 /* Multiple types in SLP are handled by creating the appropriate number of
4466 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4468 if (slp
|| PURE_SLP_STMT (stmt_info
))
4471 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4473 gcc_assert (ncopies
>= 1);
4475 /* FORNOW. This restriction should be relaxed. */
4476 if (nested_in_vect_loop
&& ncopies
> 1)
4478 if (dump_enabled_p ())
4479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4480 "multiple types in nested loop.\n");
4484 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4487 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4490 /* Is vectorizable load? */
4491 if (!is_gimple_assign (stmt
))
4494 scalar_dest
= gimple_assign_lhs (stmt
);
4495 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4498 code
= gimple_assign_rhs_code (stmt
);
4499 if (code
!= ARRAY_REF
4500 && code
!= BIT_FIELD_REF
4501 && code
!= INDIRECT_REF
4502 && code
!= COMPONENT_REF
4503 && code
!= IMAGPART_EXPR
4504 && code
!= REALPART_EXPR
4506 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4509 if (!STMT_VINFO_DATA_REF (stmt_info
))
4512 elem_type
= TREE_TYPE (vectype
);
4513 mode
= TYPE_MODE (vectype
);
4515 /* FORNOW. In some cases can vectorize even if data-type not supported
4516 (e.g. - data copies). */
4517 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4519 if (dump_enabled_p ())
4520 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4521 "Aligned load, but unsupported type.\n");
4525 /* Check if the load is a part of an interleaving chain. */
4526 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4528 grouped_load
= true;
4530 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4532 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4533 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4535 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4536 if (vect_load_lanes_supported (vectype
, group_size
))
4537 load_lanes_p
= true;
4538 else if (!vect_grouped_load_supported (vectype
, group_size
))
4544 if (STMT_VINFO_GATHER_P (stmt_info
))
4548 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4549 &gather_off
, &gather_scale
);
4550 gcc_assert (gather_decl
);
4551 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4552 &def_stmt
, &def
, &gather_dt
,
4553 &gather_off_vectype
))
4555 if (dump_enabled_p ())
4556 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4557 "gather index use not simple.\n");
4561 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4565 negative
= tree_int_cst_compare (nested_in_vect_loop
4566 ? STMT_VINFO_DR_STEP (stmt_info
)
4568 size_zero_node
) < 0;
4569 if (negative
&& ncopies
> 1)
4571 if (dump_enabled_p ())
4572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4573 "multiple types with negative step.\n");
4581 if (dump_enabled_p ())
4582 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4583 "negative step for group load not supported"
4587 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4588 if (alignment_support_scheme
!= dr_aligned
4589 && alignment_support_scheme
!= dr_unaligned_supported
)
4591 if (dump_enabled_p ())
4592 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4593 "negative step but alignment required.\n");
4596 if (!perm_mask_for_reverse (vectype
))
4598 if (dump_enabled_p ())
4599 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4600 "negative step and reversing not supported."
4607 if (!vec_stmt
) /* transformation not required. */
4609 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4610 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4614 if (dump_enabled_p ())
4615 dump_printf_loc (MSG_NOTE
, vect_location
,
4616 "transform load. ncopies = %d\n", ncopies
);
4620 ensure_base_align (stmt_info
, dr
);
4622 if (STMT_VINFO_GATHER_P (stmt_info
))
4624 tree vec_oprnd0
= NULL_TREE
, op
;
4625 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4626 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4627 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4628 edge pe
= loop_preheader_edge (loop
);
4631 enum { NARROW
, NONE
, WIDEN
} modifier
;
4632 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4634 if (nunits
== gather_off_nunits
)
4636 else if (nunits
== gather_off_nunits
/ 2)
4638 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4641 for (i
= 0; i
< gather_off_nunits
; ++i
)
4642 sel
[i
] = i
| nunits
;
4644 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4645 gcc_assert (perm_mask
!= NULL_TREE
);
4647 else if (nunits
== gather_off_nunits
* 2)
4649 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4652 for (i
= 0; i
< nunits
; ++i
)
4653 sel
[i
] = i
< gather_off_nunits
4654 ? i
: i
+ nunits
- gather_off_nunits
;
4656 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4657 gcc_assert (perm_mask
!= NULL_TREE
);
4663 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4664 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4665 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4666 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4667 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4668 scaletype
= TREE_VALUE (arglist
);
4669 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4670 && types_compatible_p (srctype
, masktype
));
4672 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4674 ptr
= fold_convert (ptrtype
, gather_base
);
4675 if (!is_gimple_min_invariant (ptr
))
4677 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4678 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4679 gcc_assert (!new_bb
);
4682 /* Currently we support only unconditional gather loads,
4683 so mask should be all ones. */
4684 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4685 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4686 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4690 for (j
= 0; j
< 6; ++j
)
4692 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4693 mask
= build_real (TREE_TYPE (masktype
), r
);
4697 mask
= build_vector_from_val (masktype
, mask
);
4698 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4700 scale
= build_int_cst (scaletype
, gather_scale
);
4702 prev_stmt_info
= NULL
;
4703 for (j
= 0; j
< ncopies
; ++j
)
4705 if (modifier
== WIDEN
&& (j
& 1))
4706 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4707 perm_mask
, stmt
, gsi
);
4710 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4713 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4715 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4717 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4718 == TYPE_VECTOR_SUBPARTS (idxtype
));
4719 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4720 var
= make_ssa_name (var
, NULL
);
4721 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4723 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4725 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4730 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4732 if (!useless_type_conversion_p (vectype
, rettype
))
4734 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4735 == TYPE_VECTOR_SUBPARTS (rettype
));
4736 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4737 op
= make_ssa_name (var
, new_stmt
);
4738 gimple_call_set_lhs (new_stmt
, op
);
4739 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4740 var
= make_ssa_name (vec_dest
, NULL
);
4741 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4743 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4748 var
= make_ssa_name (vec_dest
, new_stmt
);
4749 gimple_call_set_lhs (new_stmt
, var
);
4752 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4754 if (modifier
== NARROW
)
4761 var
= permute_vec_elements (prev_res
, var
,
4762 perm_mask
, stmt
, gsi
);
4763 new_stmt
= SSA_NAME_DEF_STMT (var
);
4766 if (prev_stmt_info
== NULL
)
4767 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4769 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4770 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4774 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4776 gimple_stmt_iterator incr_gsi
;
4782 vec
<constructor_elt
, va_gc
> *v
= NULL
;
4783 gimple_seq stmts
= NULL
;
4784 tree stride_base
, stride_step
, alias_off
;
4786 gcc_assert (!nested_in_vect_loop
);
4789 = fold_build_pointer_plus
4790 (unshare_expr (DR_BASE_ADDRESS (dr
)),
4791 size_binop (PLUS_EXPR
,
4792 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
4793 convert_to_ptrofftype (DR_INIT (dr
))));
4794 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
4796 /* For a load with loop-invariant (but other than power-of-2)
4797 stride (i.e. not a grouped access) like so:
4799 for (i = 0; i < n; i += stride)
4802 we generate a new induction variable and new accesses to
4803 form a new vector (or vectors, depending on ncopies):
4805 for (j = 0; ; j += VF*stride)
4807 tmp2 = array[j + stride];
4809 vectemp = {tmp1, tmp2, ...}
4812 ivstep
= stride_step
;
4813 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4814 build_int_cst (TREE_TYPE (ivstep
), vf
));
4816 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4818 create_iv (stride_base
, ivstep
, NULL
,
4819 loop
, &incr_gsi
, insert_after
,
4821 incr
= gsi_stmt (incr_gsi
);
4822 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4824 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4826 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4828 prev_stmt_info
= NULL
;
4829 running_off
= offvar
;
4830 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
4831 for (j
= 0; j
< ncopies
; j
++)
4835 vec_alloc (v
, nunits
);
4836 for (i
= 0; i
< nunits
; i
++)
4838 tree newref
, newoff
;
4840 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
4841 running_off
, alias_off
);
4843 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4846 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4847 newoff
= copy_ssa_name (running_off
, NULL
);
4848 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4849 running_off
, stride_step
);
4850 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4852 running_off
= newoff
;
4855 vec_inv
= build_constructor (vectype
, v
);
4856 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4857 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4860 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4862 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4863 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4870 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4872 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
4873 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
4874 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4876 /* Check if the chain of loads is already vectorized. */
4877 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
4878 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4879 ??? But we can only do so if there is exactly one
4880 as we have no way to get at the rest. Leave the CSE
4882 ??? With the group load eventually participating
4883 in multiple different permutations (having multiple
4884 slp nodes which refer to the same group) the CSE
4885 is even wrong code. See PR56270. */
4888 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4891 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4892 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4894 /* VEC_NUM is the number of vect stmts to be created for this group. */
4897 grouped_load
= false;
4898 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4899 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
4901 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
4905 vec_num
= group_size
;
4913 group_size
= vec_num
= 1;
4917 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4918 gcc_assert (alignment_support_scheme
);
4919 /* Targets with load-lane instructions must not require explicit
4921 gcc_assert (!load_lanes_p
4922 || alignment_support_scheme
== dr_aligned
4923 || alignment_support_scheme
== dr_unaligned_supported
);
4925 /* In case the vectorization factor (VF) is bigger than the number
4926 of elements that we can fit in a vectype (nunits), we have to generate
4927 more than one vector stmt - i.e - we need to "unroll" the
4928 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4929 from one copy of the vector stmt to the next, in the field
4930 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4931 stages to find the correct vector defs to be used when vectorizing
4932 stmts that use the defs of the current stmt. The example below
4933 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4934 need to create 4 vectorized stmts):
4936 before vectorization:
4937 RELATED_STMT VEC_STMT
4941 step 1: vectorize stmt S1:
4942 We first create the vector stmt VS1_0, and, as usual, record a
4943 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4944 Next, we create the vector stmt VS1_1, and record a pointer to
4945 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4946 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4948 RELATED_STMT VEC_STMT
4949 VS1_0: vx0 = memref0 VS1_1 -
4950 VS1_1: vx1 = memref1 VS1_2 -
4951 VS1_2: vx2 = memref2 VS1_3 -
4952 VS1_3: vx3 = memref3 - -
4953 S1: x = load - VS1_0
4956 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4957 information we recorded in RELATED_STMT field is used to vectorize
4960 /* In case of interleaving (non-unit grouped access):
4967 Vectorized loads are created in the order of memory accesses
4968 starting from the access of the first stmt of the chain:
4971 VS2: vx1 = &base + vec_size*1
4972 VS3: vx3 = &base + vec_size*2
4973 VS4: vx4 = &base + vec_size*3
4975 Then permutation statements are generated:
4977 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4978 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4981 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4982 (the order of the data-refs in the output of vect_permute_load_chain
4983 corresponds to the order of scalar stmts in the interleaving chain - see
4984 the documentation of vect_permute_load_chain()).
4985 The generation of permutation stmts and recording them in
4986 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4988 In case of both multiple types and interleaving, the vector loads and
4989 permutation stmts above are created for every copy. The result vector
4990 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4991 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4993 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4994 on a target that supports unaligned accesses (dr_unaligned_supported)
4995 we generate the following code:
4999 p = p + indx * vectype_size;
5004 Otherwise, the data reference is potentially unaligned on a target that
5005 does not support unaligned accesses (dr_explicit_realign_optimized) -
5006 then generate the following code, in which the data in each iteration is
5007 obtained by two vector loads, one from the previous iteration, and one
5008 from the current iteration:
5010 msq_init = *(floor(p1))
5011 p2 = initial_addr + VS - 1;
5012 realignment_token = call target_builtin;
5015 p2 = p2 + indx * vectype_size
5017 vec_dest = realign_load (msq, lsq, realignment_token)
5022 /* If the misalignment remains the same throughout the execution of the
5023 loop, we can create the init_addr and permutation mask at the loop
5024 preheader. Otherwise, it needs to be created inside the loop.
5025 This can only occur when vectorizing memory accesses in the inner-loop
5026 nested within an outer-loop that is being vectorized. */
5028 if (nested_in_vect_loop
5029 && (TREE_INT_CST_LOW (DR_STEP (dr
))
5030 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
5032 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
5033 compute_in_loop
= true;
5036 if ((alignment_support_scheme
== dr_explicit_realign_optimized
5037 || alignment_support_scheme
== dr_explicit_realign
)
5038 && !compute_in_loop
)
5040 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
5041 alignment_support_scheme
, NULL_TREE
,
5043 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5045 phi
= SSA_NAME_DEF_STMT (msq
);
5046 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5053 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5056 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5058 aggr_type
= vectype
;
5060 prev_stmt_info
= NULL
;
5061 for (j
= 0; j
< ncopies
; j
++)
5063 /* 1. Create the vector or array pointer update chain. */
5066 bool simd_lane_access_p
5067 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5068 if (simd_lane_access_p
5069 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5070 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5071 && integer_zerop (DR_OFFSET (first_dr
))
5072 && integer_zerop (DR_INIT (first_dr
))
5073 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5074 get_alias_set (DR_REF (first_dr
)))
5075 && (alignment_support_scheme
== dr_aligned
5076 || alignment_support_scheme
== dr_unaligned_supported
))
5078 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5079 dataref_offset
= build_int_cst (reference_alias_ptr_type
5080 (DR_REF (first_dr
)), 0);
5085 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
5086 offset
, &dummy
, gsi
, &ptr_incr
,
5087 simd_lane_access_p
, &inv_p
);
5089 else if (dataref_offset
)
5090 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
5091 TYPE_SIZE_UNIT (aggr_type
));
5093 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5094 TYPE_SIZE_UNIT (aggr_type
));
5096 if (grouped_load
|| slp_perm
)
5097 dr_chain
.create (vec_num
);
5103 vec_array
= create_vector_array (vectype
, vec_num
);
5106 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5107 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5108 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
5109 gimple_call_set_lhs (new_stmt
, vec_array
);
5110 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5112 /* Extract each vector into an SSA_NAME. */
5113 for (i
= 0; i
< vec_num
; i
++)
5115 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
5117 dr_chain
.quick_push (new_temp
);
5120 /* Record the mapping between SSA_NAMEs and statements. */
5121 vect_record_grouped_load_vectors (stmt
, dr_chain
);
5125 for (i
= 0; i
< vec_num
; i
++)
5128 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5131 /* 2. Create the vector-load in the loop. */
5132 switch (alignment_support_scheme
)
5135 case dr_unaligned_supported
:
5137 unsigned int align
, misalign
;
5140 = build2 (MEM_REF
, vectype
, dataref_ptr
,
5143 : build_int_cst (reference_alias_ptr_type
5144 (DR_REF (first_dr
)), 0));
5145 align
= TYPE_ALIGN_UNIT (vectype
);
5146 if (alignment_support_scheme
== dr_aligned
)
5148 gcc_assert (aligned_access_p (first_dr
));
5151 else if (DR_MISALIGNMENT (first_dr
) == -1)
5153 TREE_TYPE (data_ref
)
5154 = build_aligned_type (TREE_TYPE (data_ref
),
5155 TYPE_ALIGN (elem_type
));
5156 align
= TYPE_ALIGN_UNIT (elem_type
);
5161 TREE_TYPE (data_ref
)
5162 = build_aligned_type (TREE_TYPE (data_ref
),
5163 TYPE_ALIGN (elem_type
));
5164 misalign
= DR_MISALIGNMENT (first_dr
);
5166 if (dataref_offset
== NULL_TREE
)
5167 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
5171 case dr_explicit_realign
:
5176 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5178 if (compute_in_loop
)
5179 msq
= vect_setup_realignment (first_stmt
, gsi
,
5181 dr_explicit_realign
,
5184 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
5185 new_stmt
= gimple_build_assign_with_ops
5186 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
5188 (TREE_TYPE (dataref_ptr
),
5189 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5190 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5192 = build2 (MEM_REF
, vectype
, ptr
,
5193 build_int_cst (reference_alias_ptr_type
5194 (DR_REF (first_dr
)), 0));
5195 vec_dest
= vect_create_destination_var (scalar_dest
,
5197 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5198 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5199 gimple_assign_set_lhs (new_stmt
, new_temp
);
5200 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5201 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5202 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5205 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5206 TYPE_SIZE_UNIT (elem_type
));
5207 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5208 new_stmt
= gimple_build_assign_with_ops
5209 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5212 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5213 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
5214 gimple_assign_set_lhs (new_stmt
, ptr
);
5215 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5217 = build2 (MEM_REF
, vectype
, ptr
,
5218 build_int_cst (reference_alias_ptr_type
5219 (DR_REF (first_dr
)), 0));
5222 case dr_explicit_realign_optimized
:
5223 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5224 new_stmt
= gimple_build_assign_with_ops
5225 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5227 (TREE_TYPE (dataref_ptr
),
5228 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5229 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5231 = build2 (MEM_REF
, vectype
, new_temp
,
5232 build_int_cst (reference_alias_ptr_type
5233 (DR_REF (first_dr
)), 0));
5238 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5239 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5240 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5241 gimple_assign_set_lhs (new_stmt
, new_temp
);
5242 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5244 /* 3. Handle explicit realignment if necessary/supported.
5246 vec_dest = realign_load (msq, lsq, realignment_token) */
5247 if (alignment_support_scheme
== dr_explicit_realign_optimized
5248 || alignment_support_scheme
== dr_explicit_realign
)
5250 lsq
= gimple_assign_lhs (new_stmt
);
5251 if (!realignment_token
)
5252 realignment_token
= dataref_ptr
;
5253 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5255 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
5258 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5259 gimple_assign_set_lhs (new_stmt
, new_temp
);
5260 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5262 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5265 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5266 add_phi_arg (phi
, lsq
,
5267 loop_latch_edge (containing_loop
),
5273 /* 4. Handle invariant-load. */
5274 if (inv_p
&& !bb_vinfo
)
5276 gimple_stmt_iterator gsi2
= *gsi
;
5277 gcc_assert (!grouped_load
);
5279 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5281 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5286 tree perm_mask
= perm_mask_for_reverse (vectype
);
5287 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5288 perm_mask
, stmt
, gsi
);
5289 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5292 /* Collect vector loads and later create their permutation in
5293 vect_transform_grouped_load (). */
5294 if (grouped_load
|| slp_perm
)
5295 dr_chain
.quick_push (new_temp
);
5297 /* Store vector loads in the corresponding SLP_NODE. */
5298 if (slp
&& !slp_perm
)
5299 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5301 /* Bump the vector pointer to account for a gap. */
5302 if (slp
&& group_gap
!= 0)
5304 tree bump
= size_binop (MULT_EXPR
,
5305 TYPE_SIZE_UNIT (elem_type
),
5306 size_int (group_gap
));
5307 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5312 if (slp
&& !slp_perm
)
5317 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
5318 slp_node_instance
, false))
5320 dr_chain
.release ();
5329 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5330 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5335 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5337 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5338 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5341 dr_chain
.release ();
5347 /* Function vect_is_simple_cond.
5350 LOOP - the loop that is being vectorized.
5351 COND - Condition that is checked for simple use.
5354 *COMP_VECTYPE - the vector type for the comparison.
5356 Returns whether a COND can be vectorized. Checks whether
5357 condition operands are supportable using vec_is_simple_use. */
5360 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5361 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5365 enum vect_def_type dt
;
5366 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5368 if (!COMPARISON_CLASS_P (cond
))
5371 lhs
= TREE_OPERAND (cond
, 0);
5372 rhs
= TREE_OPERAND (cond
, 1);
5374 if (TREE_CODE (lhs
) == SSA_NAME
)
5376 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5377 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5378 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5381 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5382 && TREE_CODE (lhs
) != FIXED_CST
)
5385 if (TREE_CODE (rhs
) == SSA_NAME
)
5387 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5388 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5389 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5392 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5393 && TREE_CODE (rhs
) != FIXED_CST
)
5396 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5400 /* vectorizable_condition.
5402 Check if STMT is conditional modify expression that can be vectorized.
5403 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5404 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5407 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5408 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5409 else caluse if it is 2).
5411 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5414 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5415 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5418 tree scalar_dest
= NULL_TREE
;
5419 tree vec_dest
= NULL_TREE
;
5420 tree cond_expr
, then_clause
, else_clause
;
5421 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5422 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5423 tree comp_vectype
= NULL_TREE
;
5424 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5425 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5426 tree vec_compare
, vec_cond_expr
;
5428 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5430 enum vect_def_type dt
, dts
[4];
5431 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5433 enum tree_code code
;
5434 stmt_vec_info prev_stmt_info
= NULL
;
5436 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5437 vec
<tree
> vec_oprnds0
= vNULL
;
5438 vec
<tree
> vec_oprnds1
= vNULL
;
5439 vec
<tree
> vec_oprnds2
= vNULL
;
5440 vec
<tree
> vec_oprnds3
= vNULL
;
5443 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5446 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5448 gcc_assert (ncopies
>= 1);
5449 if (reduc_index
&& ncopies
> 1)
5450 return false; /* FORNOW */
5452 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5455 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5458 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5459 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5463 /* FORNOW: not yet supported. */
5464 if (STMT_VINFO_LIVE_P (stmt_info
))
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5468 "value used after loop.\n");
5472 /* Is vectorizable conditional operation? */
5473 if (!is_gimple_assign (stmt
))
5476 code
= gimple_assign_rhs_code (stmt
);
5478 if (code
!= COND_EXPR
)
5481 cond_expr
= gimple_assign_rhs1 (stmt
);
5482 then_clause
= gimple_assign_rhs2 (stmt
);
5483 else_clause
= gimple_assign_rhs3 (stmt
);
5485 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5490 if (TREE_CODE (then_clause
) == SSA_NAME
)
5492 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5493 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5494 &then_def_stmt
, &def
, &dt
))
5497 else if (TREE_CODE (then_clause
) != INTEGER_CST
5498 && TREE_CODE (then_clause
) != REAL_CST
5499 && TREE_CODE (then_clause
) != FIXED_CST
)
5502 if (TREE_CODE (else_clause
) == SSA_NAME
)
5504 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5505 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5506 &else_def_stmt
, &def
, &dt
))
5509 else if (TREE_CODE (else_clause
) != INTEGER_CST
5510 && TREE_CODE (else_clause
) != REAL_CST
5511 && TREE_CODE (else_clause
) != FIXED_CST
)
5514 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
5515 /* The result of a vector comparison should be signed type. */
5516 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
5517 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
5518 if (vec_cmp_type
== NULL_TREE
)
5523 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5524 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5531 vec_oprnds0
.create (1);
5532 vec_oprnds1
.create (1);
5533 vec_oprnds2
.create (1);
5534 vec_oprnds3
.create (1);
5538 scalar_dest
= gimple_assign_lhs (stmt
);
5539 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5541 /* Handle cond expr. */
5542 for (j
= 0; j
< ncopies
; j
++)
5544 gimple new_stmt
= NULL
;
5551 vec
<vec
<tree
> > vec_defs
;
5553 vec_defs
.create (4);
5554 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
5555 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
5556 ops
.safe_push (then_clause
);
5557 ops
.safe_push (else_clause
);
5558 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5559 vec_oprnds3
= vec_defs
.pop ();
5560 vec_oprnds2
= vec_defs
.pop ();
5561 vec_oprnds1
= vec_defs
.pop ();
5562 vec_oprnds0
= vec_defs
.pop ();
5565 vec_defs
.release ();
5571 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5573 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5574 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5577 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5579 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5580 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5581 if (reduc_index
== 1)
5582 vec_then_clause
= reduc_def
;
5585 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5587 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5588 NULL
, >emp
, &def
, &dts
[2]);
5590 if (reduc_index
== 2)
5591 vec_else_clause
= reduc_def
;
5594 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5596 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5597 NULL
, >emp
, &def
, &dts
[3]);
5603 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5604 vec_oprnds0
.pop ());
5605 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5606 vec_oprnds1
.pop ());
5607 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5608 vec_oprnds2
.pop ());
5609 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5610 vec_oprnds3
.pop ());
5615 vec_oprnds0
.quick_push (vec_cond_lhs
);
5616 vec_oprnds1
.quick_push (vec_cond_rhs
);
5617 vec_oprnds2
.quick_push (vec_then_clause
);
5618 vec_oprnds3
.quick_push (vec_else_clause
);
5621 /* Arguments are ready. Create the new vector stmt. */
5622 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
5624 vec_cond_rhs
= vec_oprnds1
[i
];
5625 vec_then_clause
= vec_oprnds2
[i
];
5626 vec_else_clause
= vec_oprnds3
[i
];
5628 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
5629 vec_cond_lhs
, vec_cond_rhs
);
5630 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5631 vec_compare
, vec_then_clause
, vec_else_clause
);
5633 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5634 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5635 gimple_assign_set_lhs (new_stmt
, new_temp
);
5636 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5638 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5645 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5647 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5649 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5652 vec_oprnds0
.release ();
5653 vec_oprnds1
.release ();
5654 vec_oprnds2
.release ();
5655 vec_oprnds3
.release ();
5661 /* Make sure the statement is vectorizable. */
5664 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5666 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5667 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5668 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5670 tree scalar_type
, vectype
;
5671 gimple pattern_stmt
;
5672 gimple_seq pattern_def_seq
;
5674 if (dump_enabled_p ())
5676 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
5677 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5678 dump_printf (MSG_NOTE
, "\n");
5681 if (gimple_has_volatile_ops (stmt
))
5683 if (dump_enabled_p ())
5684 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5685 "not vectorized: stmt has volatile operands\n");
5690 /* Skip stmts that do not need to be vectorized. In loops this is expected
5692 - the COND_EXPR which is the loop exit condition
5693 - any LABEL_EXPRs in the loop
5694 - computations that are used only for array indexing or loop control.
5695 In basic blocks we only analyze statements that are a part of some SLP
5696 instance, therefore, all the statements are relevant.
5698 Pattern statement needs to be analyzed instead of the original statement
5699 if the original statement is not relevant. Otherwise, we analyze both
5700 statements. In basic blocks we are called from some SLP instance
5701 traversal, don't analyze pattern stmts instead, the pattern stmts
5702 already will be part of SLP instance. */
5704 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5705 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5706 && !STMT_VINFO_LIVE_P (stmt_info
))
5708 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5710 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5711 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5713 /* Analyze PATTERN_STMT instead of the original stmt. */
5714 stmt
= pattern_stmt
;
5715 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5716 if (dump_enabled_p ())
5718 dump_printf_loc (MSG_NOTE
, vect_location
,
5719 "==> examining pattern statement: ");
5720 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5721 dump_printf (MSG_NOTE
, "\n");
5726 if (dump_enabled_p ())
5727 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
5732 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5735 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5736 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5738 /* Analyze PATTERN_STMT too. */
5739 if (dump_enabled_p ())
5741 dump_printf_loc (MSG_NOTE
, vect_location
,
5742 "==> examining pattern statement: ");
5743 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5744 dump_printf (MSG_NOTE
, "\n");
5747 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5751 if (is_pattern_stmt_p (stmt_info
)
5753 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5755 gimple_stmt_iterator si
;
5757 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5759 gimple pattern_def_stmt
= gsi_stmt (si
);
5760 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5761 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5763 /* Analyze def stmt of STMT if it's a pattern stmt. */
5764 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_NOTE
, vect_location
,
5767 "==> examining pattern def statement: ");
5768 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
5769 dump_printf (MSG_NOTE
, "\n");
5772 if (!vect_analyze_stmt (pattern_def_stmt
,
5773 need_to_vectorize
, node
))
5779 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5781 case vect_internal_def
:
5784 case vect_reduction_def
:
5785 case vect_nested_cycle
:
5786 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5787 || relevance
== vect_used_in_outer_by_reduction
5788 || relevance
== vect_unused_in_scope
));
5791 case vect_induction_def
:
5792 case vect_constant_def
:
5793 case vect_external_def
:
5794 case vect_unknown_def_type
:
5801 gcc_assert (PURE_SLP_STMT (stmt_info
));
5803 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5804 if (dump_enabled_p ())
5806 dump_printf_loc (MSG_NOTE
, vect_location
,
5807 "get vectype for scalar type: ");
5808 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
5809 dump_printf (MSG_NOTE
, "\n");
5812 vectype
= get_vectype_for_scalar_type (scalar_type
);
5815 if (dump_enabled_p ())
5817 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5818 "not SLPed: unsupported data-type ");
5819 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5821 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5826 if (dump_enabled_p ())
5828 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
5829 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
5830 dump_printf (MSG_NOTE
, "\n");
5833 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5836 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5838 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5839 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5840 *need_to_vectorize
= true;
5845 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5846 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5847 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5848 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5849 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5850 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5851 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5852 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5853 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5854 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5855 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5859 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5860 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5861 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5862 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5863 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5864 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5865 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5866 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5871 if (dump_enabled_p ())
5873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5874 "not vectorized: relevant stmt not ");
5875 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5876 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5877 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5886 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5887 need extra handling, except for vectorizable reductions. */
5888 if (STMT_VINFO_LIVE_P (stmt_info
)
5889 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5890 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5894 if (dump_enabled_p ())
5896 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5897 "not vectorized: live stmt not ");
5898 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5900 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5910 /* Function vect_transform_stmt.
5912 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5915 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5916 bool *grouped_store
, slp_tree slp_node
,
5917 slp_instance slp_node_instance
)
5919 bool is_store
= false;
5920 gimple vec_stmt
= NULL
;
5921 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5924 switch (STMT_VINFO_TYPE (stmt_info
))
5926 case type_demotion_vec_info_type
:
5927 case type_promotion_vec_info_type
:
5928 case type_conversion_vec_info_type
:
5929 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5933 case induc_vec_info_type
:
5934 gcc_assert (!slp_node
);
5935 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5939 case shift_vec_info_type
:
5940 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5944 case op_vec_info_type
:
5945 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5949 case assignment_vec_info_type
:
5950 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5954 case load_vec_info_type
:
5955 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5960 case store_vec_info_type
:
5961 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5965 /* In case of interleaving, the whole chain is vectorized when the
5966 last store in the chain is reached. Store stmts before the last
5967 one are skipped, and there vec_stmt_info shouldn't be freed
5969 *grouped_store
= true;
5970 if (STMT_VINFO_VEC_STMT (stmt_info
))
5977 case condition_vec_info_type
:
5978 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5982 case call_vec_info_type
:
5983 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5984 stmt
= gsi_stmt (*gsi
);
5987 case reduc_vec_info_type
:
5988 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5993 if (!STMT_VINFO_LIVE_P (stmt_info
))
5995 if (dump_enabled_p ())
5996 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5997 "stmt not supported.\n");
6002 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
6003 is being vectorized, but outside the immediately enclosing loop. */
6005 && STMT_VINFO_LOOP_VINFO (stmt_info
)
6006 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
6007 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
6008 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
6009 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
6010 || STMT_VINFO_RELEVANT (stmt_info
) ==
6011 vect_used_in_outer_by_reduction
))
6013 struct loop
*innerloop
= LOOP_VINFO_LOOP (
6014 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
6015 imm_use_iterator imm_iter
;
6016 use_operand_p use_p
;
6020 if (dump_enabled_p ())
6021 dump_printf_loc (MSG_NOTE
, vect_location
,
6022 "Record the vdef for outer-loop vectorization.\n");
6024 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6025 (to be used when vectorizing outer-loop stmts that use the DEF of
6027 if (gimple_code (stmt
) == GIMPLE_PHI
)
6028 scalar_dest
= PHI_RESULT (stmt
);
6030 scalar_dest
= gimple_assign_lhs (stmt
);
6032 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
6034 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
6036 exit_phi
= USE_STMT (use_p
);
6037 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
6042 /* Handle stmts whose DEF is used outside the loop-nest that is
6043 being vectorized. */
6044 if (STMT_VINFO_LIVE_P (stmt_info
)
6045 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
6047 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
6052 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
6058 /* Remove a group of stores (for SLP or interleaving), free their
6062 vect_remove_stores (gimple first_stmt
)
6064 gimple next
= first_stmt
;
6066 gimple_stmt_iterator next_si
;
6070 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
6072 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
6073 if (is_pattern_stmt_p (stmt_info
))
6074 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
6075 /* Free the attached stmt_vec_info and remove the stmt. */
6076 next_si
= gsi_for_stmt (next
);
6077 unlink_stmt_vdef (next
);
6078 gsi_remove (&next_si
, true);
6079 release_defs (next
);
6080 free_stmt_vec_info (next
);
6086 /* Function new_stmt_vec_info.
6088 Create and initialize a new stmt_vec_info struct for STMT. */
6091 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
6092 bb_vec_info bb_vinfo
)
6095 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
6097 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
6098 STMT_VINFO_STMT (res
) = stmt
;
6099 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
6100 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
6101 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
6102 STMT_VINFO_LIVE_P (res
) = false;
6103 STMT_VINFO_VECTYPE (res
) = NULL
;
6104 STMT_VINFO_VEC_STMT (res
) = NULL
;
6105 STMT_VINFO_VECTORIZABLE (res
) = true;
6106 STMT_VINFO_IN_PATTERN_P (res
) = false;
6107 STMT_VINFO_RELATED_STMT (res
) = NULL
;
6108 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
6109 STMT_VINFO_DATA_REF (res
) = NULL
;
6111 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
6112 STMT_VINFO_DR_OFFSET (res
) = NULL
;
6113 STMT_VINFO_DR_INIT (res
) = NULL
;
6114 STMT_VINFO_DR_STEP (res
) = NULL
;
6115 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
6117 if (gimple_code (stmt
) == GIMPLE_PHI
6118 && is_loop_header_bb_p (gimple_bb (stmt
)))
6119 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
6121 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
6123 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
6124 STMT_SLP_TYPE (res
) = loop_vect
;
6125 GROUP_FIRST_ELEMENT (res
) = NULL
;
6126 GROUP_NEXT_ELEMENT (res
) = NULL
;
6127 GROUP_SIZE (res
) = 0;
6128 GROUP_STORE_COUNT (res
) = 0;
6129 GROUP_GAP (res
) = 0;
6130 GROUP_SAME_DR_STMT (res
) = NULL
;
6136 /* Create a hash table for stmt_vec_info. */
6139 init_stmt_vec_info_vec (void)
6141 gcc_assert (!stmt_vec_info_vec
.exists ());
6142 stmt_vec_info_vec
.create (50);
6146 /* Free hash table for stmt_vec_info. */
6149 free_stmt_vec_info_vec (void)
6153 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
6155 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
6156 gcc_assert (stmt_vec_info_vec
.exists ());
6157 stmt_vec_info_vec
.release ();
6161 /* Free stmt vectorization related info. */
6164 free_stmt_vec_info (gimple stmt
)
6166 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6171 /* Check if this statement has a related "pattern stmt"
6172 (introduced by the vectorizer during the pattern recognition
6173 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6175 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
6177 stmt_vec_info patt_info
6178 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6181 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
6184 gimple_stmt_iterator si
;
6185 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
6186 free_stmt_vec_info (gsi_stmt (si
));
6188 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
6192 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
6193 set_vinfo_for_stmt (stmt
, NULL
);
6198 /* Function get_vectype_for_scalar_type_and_size.
6200 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6204 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
6206 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
6207 enum machine_mode simd_mode
;
6208 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
6215 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
6216 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
6219 /* For vector types of elements whose mode precision doesn't
6220 match their types precision we use a element type of mode
6221 precision. The vectorization routines will have to make sure
6222 they support the proper result truncation/extension.
6223 We also make sure to build vector types with INTEGER_TYPE
6224 component type only. */
6225 if (INTEGRAL_TYPE_P (scalar_type
)
6226 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
6227 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
6228 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
6229 TYPE_UNSIGNED (scalar_type
));
6231 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6232 When the component mode passes the above test simply use a type
6233 corresponding to that mode. The theory is that any use that
6234 would cause problems with this will disable vectorization anyway. */
6235 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
6236 && !INTEGRAL_TYPE_P (scalar_type
))
6237 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
6239 /* We can't build a vector type of elements with alignment bigger than
6241 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
6242 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
6243 TYPE_UNSIGNED (scalar_type
));
6245 /* If we felt back to using the mode fail if there was
6246 no scalar type for it. */
6247 if (scalar_type
== NULL_TREE
)
6250 /* If no size was supplied use the mode the target prefers. Otherwise
6251 lookup a vector mode of the specified size. */
6253 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6255 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6256 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6260 vectype
= build_vector_type (scalar_type
, nunits
);
6262 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6263 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6269 unsigned int current_vector_size
;
6271 /* Function get_vectype_for_scalar_type.
6273 Returns the vector type corresponding to SCALAR_TYPE as supported
6277 get_vectype_for_scalar_type (tree scalar_type
)
6280 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6281 current_vector_size
);
6283 && current_vector_size
== 0)
6284 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6288 /* Function get_same_sized_vectype
6290 Returns a vector type corresponding to SCALAR_TYPE of size
6291 VECTOR_TYPE if supported by the target. */
6294 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6296 return get_vectype_for_scalar_type_and_size
6297 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6300 /* Function vect_is_simple_use.
6303 LOOP_VINFO - the vect info of the loop that is being vectorized.
6304 BB_VINFO - the vect info of the basic block that is being vectorized.
6305 OPERAND - operand of STMT in the loop or bb.
6306 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6308 Returns whether a stmt with OPERAND can be vectorized.
6309 For loops, supportable operands are constants, loop invariants, and operands
6310 that are defined by the current iteration of the loop. Unsupportable
6311 operands are those that are defined by a previous iteration of the loop (as
6312 is the case in reduction/induction computations).
6313 For basic blocks, supportable operands are constants and bb invariants.
6314 For now, operands defined outside the basic block are not supported. */
6317 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6318 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6319 tree
*def
, enum vect_def_type
*dt
)
6322 stmt_vec_info stmt_vinfo
;
6323 struct loop
*loop
= NULL
;
6326 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6331 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_NOTE
, vect_location
,
6334 "vect_is_simple_use: operand ");
6335 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
6336 dump_printf (MSG_NOTE
, "\n");
6339 if (CONSTANT_CLASS_P (operand
))
6341 *dt
= vect_constant_def
;
6345 if (is_gimple_min_invariant (operand
))
6348 *dt
= vect_external_def
;
6352 if (TREE_CODE (operand
) == PAREN_EXPR
)
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
6356 operand
= TREE_OPERAND (operand
, 0);
6359 if (TREE_CODE (operand
) != SSA_NAME
)
6361 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6367 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6368 if (*def_stmt
== NULL
)
6370 if (dump_enabled_p ())
6371 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6376 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
6379 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
6380 dump_printf (MSG_NOTE
, "\n");
6383 /* Empty stmt is expected only in case of a function argument.
6384 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6385 if (gimple_nop_p (*def_stmt
))
6388 *dt
= vect_external_def
;
6392 bb
= gimple_bb (*def_stmt
);
6394 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6395 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6396 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6397 *dt
= vect_external_def
;
6400 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6401 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6404 if (*dt
== vect_unknown_def_type
6406 && *dt
== vect_double_reduction_def
6407 && gimple_code (stmt
) != GIMPLE_PHI
))
6409 if (dump_enabled_p ())
6410 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6411 "Unsupported pattern.\n");
6415 if (dump_enabled_p ())
6416 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
6418 switch (gimple_code (*def_stmt
))
6421 *def
= gimple_phi_result (*def_stmt
);
6425 *def
= gimple_assign_lhs (*def_stmt
);
6429 *def
= gimple_call_lhs (*def_stmt
);
6434 if (dump_enabled_p ())
6435 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6436 "unsupported defining stmt:\n");
6443 /* Function vect_is_simple_use_1.
6445 Same as vect_is_simple_use_1 but also determines the vector operand
6446 type of OPERAND and stores it to *VECTYPE. If the definition of
6447 OPERAND is vect_uninitialized_def, vect_constant_def or
6448 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6449 is responsible to compute the best suited vector type for the
6453 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6454 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6455 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6457 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6461 /* Now get a vector type if the def is internal, otherwise supply
6462 NULL_TREE and leave it up to the caller to figure out a proper
6463 type for the use stmt. */
6464 if (*dt
== vect_internal_def
6465 || *dt
== vect_induction_def
6466 || *dt
== vect_reduction_def
6467 || *dt
== vect_double_reduction_def
6468 || *dt
== vect_nested_cycle
)
6470 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6472 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6473 && !STMT_VINFO_RELEVANT (stmt_info
)
6474 && !STMT_VINFO_LIVE_P (stmt_info
))
6475 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6477 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6478 gcc_assert (*vectype
!= NULL_TREE
);
6480 else if (*dt
== vect_uninitialized_def
6481 || *dt
== vect_constant_def
6482 || *dt
== vect_external_def
)
6483 *vectype
= NULL_TREE
;
6491 /* Function supportable_widening_operation
6493 Check whether an operation represented by the code CODE is a
6494 widening operation that is supported by the target platform in
6495 vector form (i.e., when operating on arguments of type VECTYPE_IN
6496 producing a result of type VECTYPE_OUT).
6498 Widening operations we currently support are NOP (CONVERT), FLOAT
6499 and WIDEN_MULT. This function checks if these operations are supported
6500 by the target platform either directly (via vector tree-codes), or via
6504 - CODE1 and CODE2 are codes of vector operations to be used when
6505 vectorizing the operation, if available.
6506 - MULTI_STEP_CVT determines the number of required intermediate steps in
6507 case of multi-step conversion (like char->short->int - in that case
6508 MULTI_STEP_CVT will be 1).
6509 - INTERM_TYPES contains the intermediate type required to perform the
6510 widening operation (short in the above example). */
6513 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6514 tree vectype_out
, tree vectype_in
,
6515 enum tree_code
*code1
, enum tree_code
*code2
,
6516 int *multi_step_cvt
,
6517 vec
<tree
> *interm_types
)
6519 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6520 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6521 struct loop
*vect_loop
= NULL
;
6522 enum machine_mode vec_mode
;
6523 enum insn_code icode1
, icode2
;
6524 optab optab1
, optab2
;
6525 tree vectype
= vectype_in
;
6526 tree wide_vectype
= vectype_out
;
6527 enum tree_code c1
, c2
;
6529 tree prev_type
, intermediate_type
;
6530 enum machine_mode intermediate_mode
, prev_mode
;
6531 optab optab3
, optab4
;
6533 *multi_step_cvt
= 0;
6535 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6539 case WIDEN_MULT_EXPR
:
6540 /* The result of a vectorized widening operation usually requires
6541 two vectors (because the widened results do not fit into one vector).
6542 The generated vector results would normally be expected to be
6543 generated in the same order as in the original scalar computation,
6544 i.e. if 8 results are generated in each vector iteration, they are
6545 to be organized as follows:
6546 vect1: [res1,res2,res3,res4],
6547 vect2: [res5,res6,res7,res8].
6549 However, in the special case that the result of the widening
6550 operation is used in a reduction computation only, the order doesn't
6551 matter (because when vectorizing a reduction we change the order of
6552 the computation). Some targets can take advantage of this and
6553 generate more efficient code. For example, targets like Altivec,
6554 that support widen_mult using a sequence of {mult_even,mult_odd}
6555 generate the following vectors:
6556 vect1: [res1,res3,res5,res7],
6557 vect2: [res2,res4,res6,res8].
6559 When vectorizing outer-loops, we execute the inner-loop sequentially
6560 (each vectorized inner-loop iteration contributes to VF outer-loop
6561 iterations in parallel). We therefore don't allow to change the
6562 order of the computation in the inner-loop during outer-loop
6564 /* TODO: Another case in which order doesn't *really* matter is when we
6565 widen and then contract again, e.g. (short)((int)x * y >> 8).
6566 Normally, pack_trunc performs an even/odd permute, whereas the
6567 repack from an even/odd expansion would be an interleave, which
6568 would be significantly simpler for e.g. AVX2. */
6569 /* In any case, in order to avoid duplicating the code below, recurse
6570 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6571 are properly set up for the caller. If we fail, we'll continue with
6572 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6574 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6575 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6576 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6577 stmt
, vectype_out
, vectype_in
,
6578 code1
, code2
, multi_step_cvt
,
6581 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6582 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6585 case VEC_WIDEN_MULT_EVEN_EXPR
:
6586 /* Support the recursion induced just above. */
6587 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6588 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6591 case WIDEN_LSHIFT_EXPR
:
6592 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6593 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6597 c1
= VEC_UNPACK_LO_EXPR
;
6598 c2
= VEC_UNPACK_HI_EXPR
;
6602 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6603 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6606 case FIX_TRUNC_EXPR
:
6607 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6608 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6609 computing the operation. */
6616 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6618 enum tree_code ctmp
= c1
;
6623 if (code
== FIX_TRUNC_EXPR
)
6625 /* The signedness is determined from output operand. */
6626 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6627 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6631 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6632 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6635 if (!optab1
|| !optab2
)
6638 vec_mode
= TYPE_MODE (vectype
);
6639 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6640 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6646 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6647 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6650 /* Check if it's a multi-step conversion that can be done using intermediate
6653 prev_type
= vectype
;
6654 prev_mode
= vec_mode
;
6656 if (!CONVERT_EXPR_CODE_P (code
))
6659 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6660 intermediate steps in promotion sequence. We try
6661 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6663 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6664 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6666 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6668 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6669 TYPE_UNSIGNED (prev_type
));
6670 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6671 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6673 if (!optab3
|| !optab4
6674 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6675 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6676 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6677 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6678 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6679 == CODE_FOR_nothing
)
6680 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6681 == CODE_FOR_nothing
))
6684 interm_types
->quick_push (intermediate_type
);
6685 (*multi_step_cvt
)++;
6687 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6688 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6691 prev_type
= intermediate_type
;
6692 prev_mode
= intermediate_mode
;
6695 interm_types
->release ();
6700 /* Function supportable_narrowing_operation
6702 Check whether an operation represented by the code CODE is a
6703 narrowing operation that is supported by the target platform in
6704 vector form (i.e., when operating on arguments of type VECTYPE_IN
6705 and producing a result of type VECTYPE_OUT).
6707 Narrowing operations we currently support are NOP (CONVERT) and
6708 FIX_TRUNC. This function checks if these operations are supported by
6709 the target platform directly via vector tree-codes.
6712 - CODE1 is the code of a vector operation to be used when
6713 vectorizing the operation, if available.
6714 - MULTI_STEP_CVT determines the number of required intermediate steps in
6715 case of multi-step conversion (like int->short->char - in that case
6716 MULTI_STEP_CVT will be 1).
6717 - INTERM_TYPES contains the intermediate type required to perform the
6718 narrowing operation (short in the above example). */
6721 supportable_narrowing_operation (enum tree_code code
,
6722 tree vectype_out
, tree vectype_in
,
6723 enum tree_code
*code1
, int *multi_step_cvt
,
6724 vec
<tree
> *interm_types
)
6726 enum machine_mode vec_mode
;
6727 enum insn_code icode1
;
6728 optab optab1
, interm_optab
;
6729 tree vectype
= vectype_in
;
6730 tree narrow_vectype
= vectype_out
;
6732 tree intermediate_type
;
6733 enum machine_mode intermediate_mode
, prev_mode
;
6737 *multi_step_cvt
= 0;
6741 c1
= VEC_PACK_TRUNC_EXPR
;
6744 case FIX_TRUNC_EXPR
:
6745 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6749 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6750 tree code and optabs used for computing the operation. */
6757 if (code
== FIX_TRUNC_EXPR
)
6758 /* The signedness is determined from output operand. */
6759 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6761 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6766 vec_mode
= TYPE_MODE (vectype
);
6767 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6772 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6775 /* Check if it's a multi-step conversion that can be done using intermediate
6777 prev_mode
= vec_mode
;
6778 if (code
== FIX_TRUNC_EXPR
)
6779 uns
= TYPE_UNSIGNED (vectype_out
);
6781 uns
= TYPE_UNSIGNED (vectype
);
6783 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6784 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6785 costly than signed. */
6786 if (code
== FIX_TRUNC_EXPR
&& uns
)
6788 enum insn_code icode2
;
6791 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6793 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6794 if (interm_optab
!= unknown_optab
6795 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6796 && insn_data
[icode1
].operand
[0].mode
6797 == insn_data
[icode2
].operand
[0].mode
)
6800 optab1
= interm_optab
;
6805 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6806 intermediate steps in promotion sequence. We try
6807 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6808 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6809 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6811 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6813 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6815 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6818 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6819 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6820 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6821 == CODE_FOR_nothing
))
6824 interm_types
->quick_push (intermediate_type
);
6825 (*multi_step_cvt
)++;
6827 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6830 prev_mode
= intermediate_mode
;
6831 optab1
= interm_optab
;
6834 interm_types
->release ();