1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
60 return STMT_VINFO_VECTYPE (stmt_info
);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
68 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
69 basic_block bb
= gimple_bb (stmt
);
70 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
76 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
78 return (bb
->loop_father
== loop
->inner
);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
87 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
88 int misalign
, enum vect_cost_model_location where
)
92 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
93 stmt_info_for_cost si
= { count
, kind
,
94 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
96 body_cost_vec
->safe_push (si
);
98 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
101 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
102 count
, kind
, stmt_info
, misalign
, where
);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
184 enum vect_relevant relevant
, bool live_p
)
186 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
187 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
188 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
189 gimple
*pattern_stmt
;
191 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE
, vect_location
,
194 "mark relevant %d, live %d: ", relevant
, live_p
);
195 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
209 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE
, vect_location
,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info
= vinfo_for_stmt (pattern_stmt
);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
217 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
218 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
222 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
223 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
224 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
226 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
229 if (dump_enabled_p ())
230 dump_printf_loc (MSG_NOTE
, vect_location
,
231 "already marked relevant/live.\n");
235 worklist
->safe_push (stmt
);
239 /* Function vect_stmt_relevant_p.
241 Return true if STMT in loop that is represented by LOOP_VINFO is
242 "relevant for vectorization".
244 A stmt is considered "relevant for vectorization" if:
245 - it has uses outside the loop.
246 - it has vdefs (it alters memory).
247 - control stmts in the loop (except for the exit condition).
249 CHECKME: what other side effects would the vectorizer allow? */
252 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
253 enum vect_relevant
*relevant
, bool *live_p
)
255 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
257 imm_use_iterator imm_iter
;
261 *relevant
= vect_unused_in_scope
;
264 /* cond stmt other than loop exit cond. */
265 if (is_ctrl_stmt (stmt
)
266 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
267 != loop_exit_ctrl_vec_info_type
)
268 *relevant
= vect_used_in_scope
;
270 /* changing memory. */
271 if (gimple_code (stmt
) != GIMPLE_PHI
)
272 if (gimple_vdef (stmt
)
273 && !gimple_clobber_p (stmt
))
275 if (dump_enabled_p ())
276 dump_printf_loc (MSG_NOTE
, vect_location
,
277 "vec_stmt_relevant_p: stmt has vdefs.\n");
278 *relevant
= vect_used_in_scope
;
281 /* uses outside the loop. */
282 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
284 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
286 basic_block bb
= gimple_bb (USE_STMT (use_p
));
287 if (!flow_bb_inside_loop_p (loop
, bb
))
289 if (dump_enabled_p ())
290 dump_printf_loc (MSG_NOTE
, vect_location
,
291 "vec_stmt_relevant_p: used out of loop.\n");
293 if (is_gimple_debug (USE_STMT (use_p
)))
296 /* We expect all such uses to be in the loop exit phis
297 (because of loop closed form) */
298 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
299 gcc_assert (bb
== single_exit (loop
)->dest
);
306 return (*live_p
|| *relevant
);
310 /* Function exist_non_indexing_operands_for_use_p
312 USE is one of the uses attached to STMT. Check if USE is
313 used in STMT for anything other than indexing an array. */
316 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
319 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
321 /* USE corresponds to some operand in STMT. If there is no data
322 reference in STMT, then any operand that corresponds to USE
323 is not indexing an array. */
324 if (!STMT_VINFO_DATA_REF (stmt_info
))
327 /* STMT has a data_ref. FORNOW this means that its of one of
331 (This should have been verified in analyze_data_refs).
333 'var' in the second case corresponds to a def, not a use,
334 so USE cannot correspond to any operands that are not used
337 Therefore, all we need to check is if STMT falls into the
338 first case, and whether var corresponds to USE. */
340 if (!gimple_assign_copy_p (stmt
))
342 if (is_gimple_call (stmt
)
343 && gimple_call_internal_p (stmt
))
344 switch (gimple_call_internal_fn (stmt
))
347 operand
= gimple_call_arg (stmt
, 3);
352 operand
= gimple_call_arg (stmt
, 2);
362 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
364 operand
= gimple_assign_rhs1 (stmt
);
365 if (TREE_CODE (operand
) != SSA_NAME
)
376 Function process_use.
379 - a USE in STMT in a loop represented by LOOP_VINFO
380 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
381 that defined USE. This is done by calling mark_relevant and passing it
382 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
383 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
387 Generally, LIVE_P and RELEVANT are used to define the liveness and
388 relevance info of the DEF_STMT of this USE:
389 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
390 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
392 - case 1: If USE is used only for address computations (e.g. array indexing),
393 which does not need to be directly vectorized, then the liveness/relevance
394 of the respective DEF_STMT is left unchanged.
395 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
396 skip DEF_STMT cause it had already been processed.
397 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
398 be modified accordingly.
400 Return true if everything is as expected. Return false otherwise. */
403 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
404 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
407 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
408 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
409 stmt_vec_info dstmt_vinfo
;
410 basic_block bb
, def_bb
;
412 enum vect_def_type dt
;
414 /* case 1: we are only interested in uses that need to be vectorized. Uses
415 that are used for address computation are not considered relevant. */
416 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
419 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
421 if (dump_enabled_p ())
422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
423 "not vectorized: unsupported use in stmt.\n");
427 if (!def_stmt
|| gimple_nop_p (def_stmt
))
430 def_bb
= gimple_bb (def_stmt
);
431 if (!flow_bb_inside_loop_p (loop
, def_bb
))
433 if (dump_enabled_p ())
434 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
438 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
439 DEF_STMT must have already been processed, because this should be the
440 only way that STMT, which is a reduction-phi, was put in the worklist,
441 as there should be no other uses for DEF_STMT in the loop. So we just
442 check that everything is as expected, and we are done. */
443 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
444 bb
= gimple_bb (stmt
);
445 if (gimple_code (stmt
) == GIMPLE_PHI
446 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
447 && gimple_code (def_stmt
) != GIMPLE_PHI
448 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
449 && bb
->loop_father
== def_bb
->loop_father
)
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE
, vect_location
,
453 "reduc-stmt defining reduc-phi in the same nest.\n");
454 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
455 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
456 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
457 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
458 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
462 /* case 3a: outer-loop stmt defining an inner-loop stmt:
463 outer-loop-header-bb:
469 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_NOTE
, vect_location
,
473 "outer-loop def-stmt defining inner-loop stmt.\n");
477 case vect_unused_in_scope
:
478 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
479 vect_used_in_scope
: vect_unused_in_scope
;
482 case vect_used_in_outer_by_reduction
:
483 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
484 relevant
= vect_used_by_reduction
;
487 case vect_used_in_outer
:
488 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
489 relevant
= vect_used_in_scope
;
492 case vect_used_in_scope
:
500 /* case 3b: inner-loop stmt defining an outer-loop stmt:
501 outer-loop-header-bb:
505 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
507 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
509 if (dump_enabled_p ())
510 dump_printf_loc (MSG_NOTE
, vect_location
,
511 "inner-loop def-stmt defining outer-loop stmt.\n");
515 case vect_unused_in_scope
:
516 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
517 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
518 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
521 case vect_used_by_reduction
:
522 relevant
= vect_used_in_outer_by_reduction
;
525 case vect_used_in_scope
:
526 relevant
= vect_used_in_outer
;
534 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
);
539 /* Function vect_mark_stmts_to_be_vectorized.
541 Not all stmts in the loop need to be vectorized. For example:
550 Stmt 1 and 3 do not need to be vectorized, because loop control and
551 addressing of vectorized data-refs are handled differently.
553 This pass detects such stmts. */
556 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
558 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
559 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
560 unsigned int nbbs
= loop
->num_nodes
;
561 gimple_stmt_iterator si
;
564 stmt_vec_info stmt_vinfo
;
568 enum vect_relevant relevant
, tmp_relevant
;
569 enum vect_def_type def_type
;
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE
, vect_location
,
573 "=== vect_mark_stmts_to_be_vectorized ===\n");
575 auto_vec
<gimple
*, 64> worklist
;
577 /* 1. Init worklist. */
578 for (i
= 0; i
< nbbs
; i
++)
581 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
584 if (dump_enabled_p ())
586 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
587 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
590 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
591 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
593 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
595 stmt
= gsi_stmt (si
);
596 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
599 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
602 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
603 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
607 /* 2. Process_worklist */
608 while (worklist
.length () > 0)
613 stmt
= worklist
.pop ();
614 if (dump_enabled_p ())
616 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
617 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
620 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
621 (DEF_STMT) as relevant/irrelevant and live/dead according to the
622 liveness and relevance properties of STMT. */
623 stmt_vinfo
= vinfo_for_stmt (stmt
);
624 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
625 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
627 /* Generally, the liveness and relevance properties of STMT are
628 propagated as is to the DEF_STMTs of its USEs:
629 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
630 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
632 One exception is when STMT has been identified as defining a reduction
633 variable; in this case we set the liveness/relevance as follows:
635 relevant = vect_used_by_reduction
636 This is because we distinguish between two kinds of relevant stmts -
637 those that are used by a reduction computation, and those that are
638 (also) used by a regular computation. This allows us later on to
639 identify stmts that are used solely by a reduction, and therefore the
640 order of the results that they produce does not have to be kept. */
642 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
643 tmp_relevant
= relevant
;
646 case vect_reduction_def
:
647 switch (tmp_relevant
)
649 case vect_unused_in_scope
:
650 relevant
= vect_used_by_reduction
;
653 case vect_used_by_reduction
:
654 if (gimple_code (stmt
) == GIMPLE_PHI
)
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
661 "unsupported use of reduction.\n");
668 case vect_nested_cycle
:
669 if (tmp_relevant
!= vect_unused_in_scope
670 && tmp_relevant
!= vect_used_in_outer_by_reduction
671 && tmp_relevant
!= vect_used_in_outer
)
673 if (dump_enabled_p ())
674 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
675 "unsupported use of nested cycle.\n");
683 case vect_double_reduction_def
:
684 if (tmp_relevant
!= vect_unused_in_scope
685 && tmp_relevant
!= vect_used_by_reduction
)
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
689 "unsupported use of double reduction.\n");
701 if (is_pattern_stmt_p (stmt_vinfo
))
703 /* Pattern statements are not inserted into the code, so
704 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
705 have to scan the RHS or function arguments instead. */
706 if (is_gimple_assign (stmt
))
708 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
709 tree op
= gimple_assign_rhs1 (stmt
);
712 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
714 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
715 live_p
, relevant
, &worklist
, false)
716 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
717 live_p
, relevant
, &worklist
, false))
721 for (; i
< gimple_num_ops (stmt
); i
++)
723 op
= gimple_op (stmt
, i
);
724 if (TREE_CODE (op
) == SSA_NAME
725 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
730 else if (is_gimple_call (stmt
))
732 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
734 tree arg
= gimple_call_arg (stmt
, i
);
735 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
742 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
744 tree op
= USE_FROM_PTR (use_p
);
745 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
750 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
753 tree decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
755 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
759 } /* while worklist */
765 /* Function vect_model_simple_cost.
767 Models cost for simple operations, i.e. those that only emit ncopies of a
768 single op. Right now, this does not account for multiple insns that could
769 be generated for the single vector op. We will handle that shortly. */
772 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
773 enum vect_def_type
*dt
,
774 stmt_vector_for_cost
*prologue_cost_vec
,
775 stmt_vector_for_cost
*body_cost_vec
)
778 int inside_cost
= 0, prologue_cost
= 0;
780 /* The SLP costs were already calculated during SLP tree build. */
781 if (PURE_SLP_STMT (stmt_info
))
784 /* FORNOW: Assuming maximum 2 args per stmts. */
785 for (i
= 0; i
< 2; i
++)
786 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
787 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
788 stmt_info
, 0, vect_prologue
);
790 /* Pass the inside-of-loop statements to the target-specific cost model. */
791 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
792 stmt_info
, 0, vect_body
);
794 if (dump_enabled_p ())
795 dump_printf_loc (MSG_NOTE
, vect_location
,
796 "vect_model_simple_cost: inside_cost = %d, "
797 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
801 /* Model cost for type demotion and promotion operations. PWR is normally
802 zero for single-step promotions and demotions. It will be one if
803 two-step promotion/demotion is required, and so on. Each additional
804 step doubles the number of instructions required. */
807 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
808 enum vect_def_type
*dt
, int pwr
)
811 int inside_cost
= 0, prologue_cost
= 0;
812 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
813 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
814 void *target_cost_data
;
816 /* The SLP costs were already calculated during SLP tree build. */
817 if (PURE_SLP_STMT (stmt_info
))
821 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
823 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
825 for (i
= 0; i
< pwr
+ 1; i
++)
827 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
829 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
830 vec_promote_demote
, stmt_info
, 0,
834 /* FORNOW: Assuming maximum 2 args per stmts. */
835 for (i
= 0; i
< 2; i
++)
836 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
837 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
838 stmt_info
, 0, vect_prologue
);
840 if (dump_enabled_p ())
841 dump_printf_loc (MSG_NOTE
, vect_location
,
842 "vect_model_promotion_demotion_cost: inside_cost = %d, "
843 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
846 /* Function vect_cost_group_size
848 For grouped load or store, return the group_size only if it is the first
849 load or store of a group, else return 1. This ensures that group size is
850 only returned once per group. */
853 vect_cost_group_size (stmt_vec_info stmt_info
)
855 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
857 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
858 return GROUP_SIZE (stmt_info
);
864 /* Function vect_model_store_cost
866 Models cost for stores. In the case of grouped accesses, one access
867 has the overhead of the grouped access attributed to it. */
870 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
871 bool store_lanes_p
, enum vect_def_type dt
,
873 stmt_vector_for_cost
*prologue_cost_vec
,
874 stmt_vector_for_cost
*body_cost_vec
)
877 unsigned int inside_cost
= 0, prologue_cost
= 0;
878 struct data_reference
*first_dr
;
881 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
882 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
883 stmt_info
, 0, vect_prologue
);
885 /* Grouped access? */
886 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
890 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
895 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
896 group_size
= vect_cost_group_size (stmt_info
);
899 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
901 /* Not a grouped access. */
905 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
908 /* We assume that the cost of a single store-lanes instruction is
909 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
910 access is instead being provided by a permute-and-store operation,
911 include the cost of the permutes. */
912 if (!store_lanes_p
&& group_size
> 1
913 && !STMT_VINFO_STRIDED_P (stmt_info
))
915 /* Uses a high and low interleave or shuffle operations for each
917 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
918 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
919 stmt_info
, 0, vect_body
);
921 if (dump_enabled_p ())
922 dump_printf_loc (MSG_NOTE
, vect_location
,
923 "vect_model_store_cost: strided group_size = %d .\n",
927 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
928 /* Costs of the stores. */
929 if (STMT_VINFO_STRIDED_P (stmt_info
)
930 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
932 /* N scalar stores plus extracting the elements. */
933 inside_cost
+= record_stmt_cost (body_cost_vec
,
934 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
935 scalar_store
, stmt_info
, 0, vect_body
);
938 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
940 if (STMT_VINFO_STRIDED_P (stmt_info
))
941 inside_cost
+= record_stmt_cost (body_cost_vec
,
942 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
943 vec_to_scalar
, stmt_info
, 0, vect_body
);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE
, vect_location
,
947 "vect_model_store_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
952 /* Calculate cost of DR's memory access. */
954 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
955 unsigned int *inside_cost
,
956 stmt_vector_for_cost
*body_cost_vec
)
958 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
959 gimple
*stmt
= DR_STMT (dr
);
960 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
962 switch (alignment_support_scheme
)
966 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
967 vector_store
, stmt_info
, 0,
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE
, vect_location
,
972 "vect_model_store_cost: aligned.\n");
976 case dr_unaligned_supported
:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
980 unaligned_store
, stmt_info
,
981 DR_MISALIGNMENT (dr
), vect_body
);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE
, vect_location
,
984 "vect_model_store_cost: unaligned supported by "
989 case dr_unaligned_unsupported
:
991 *inside_cost
= VECT_MAX_COST
;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
995 "vect_model_store_cost: unsupported access.\n");
1005 /* Function vect_model_load_cost
1007 Models cost for loads. In the case of grouped accesses, the last access
1008 has the overhead of the grouped access attributed to it. Since unaligned
1009 accesses are supported for loads, we also account for the costs of the
1010 access scheme chosen. */
1013 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1014 bool load_lanes_p
, slp_tree slp_node
,
1015 stmt_vector_for_cost
*prologue_cost_vec
,
1016 stmt_vector_for_cost
*body_cost_vec
)
1020 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1021 unsigned int inside_cost
= 0, prologue_cost
= 0;
1023 /* Grouped accesses? */
1024 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1025 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1027 group_size
= vect_cost_group_size (stmt_info
);
1028 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1030 /* Not a grouped access. */
1037 /* We assume that the cost of a single load-lanes instruction is
1038 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1039 access is instead being provided by a load-and-permute operation,
1040 include the cost of the permutes. */
1041 if (!load_lanes_p
&& group_size
> 1
1042 && !STMT_VINFO_STRIDED_P (stmt_info
))
1044 /* Uses an even and odd extract operations or shuffle operations
1045 for each needed permute. */
1046 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1047 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1048 stmt_info
, 0, vect_body
);
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE
, vect_location
,
1052 "vect_model_load_cost: strided group_size = %d .\n",
1056 /* The loads themselves. */
1057 if (STMT_VINFO_STRIDED_P (stmt_info
)
1058 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1060 /* N scalar loads plus gathering them into a vector. */
1061 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1062 inside_cost
+= record_stmt_cost (body_cost_vec
,
1063 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1064 scalar_load
, stmt_info
, 0, vect_body
);
1067 vect_get_load_cost (first_dr
, ncopies
,
1068 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1069 || group_size
> 1 || slp_node
),
1070 &inside_cost
, &prologue_cost
,
1071 prologue_cost_vec
, body_cost_vec
, true);
1072 if (STMT_VINFO_STRIDED_P (stmt_info
))
1073 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1074 stmt_info
, 0, vect_body
);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE
, vect_location
,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1083 /* Calculate cost of DR's memory access. */
1085 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1086 bool add_realign_cost
, unsigned int *inside_cost
,
1087 unsigned int *prologue_cost
,
1088 stmt_vector_for_cost
*prologue_cost_vec
,
1089 stmt_vector_for_cost
*body_cost_vec
,
1090 bool record_prologue_costs
)
1092 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1093 gimple
*stmt
= DR_STMT (dr
);
1094 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1096 switch (alignment_support_scheme
)
1100 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1101 stmt_info
, 0, vect_body
);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE
, vect_location
,
1105 "vect_model_load_cost: aligned.\n");
1109 case dr_unaligned_supported
:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1113 unaligned_load
, stmt_info
,
1114 DR_MISALIGNMENT (dr
), vect_body
);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE
, vect_location
,
1118 "vect_model_load_cost: unaligned supported by "
1123 case dr_explicit_realign
:
1125 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1126 vector_load
, stmt_info
, 0, vect_body
);
1127 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1128 vec_perm
, stmt_info
, 0, vect_body
);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1133 if (targetm
.vectorize
.builtin_mask_for_load
)
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1135 stmt_info
, 0, vect_body
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_load_cost: explicit realign\n");
1143 case dr_explicit_realign_optimized
:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: unaligned software "
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost
&& record_prologue_costs
)
1159 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1160 vector_stmt
, stmt_info
,
1162 if (targetm
.vectorize
.builtin_mask_for_load
)
1163 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1164 vector_stmt
, stmt_info
,
1168 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1169 stmt_info
, 0, vect_body
);
1170 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1171 stmt_info
, 0, vect_body
);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE
, vect_location
,
1175 "vect_model_load_cost: explicit realign optimized"
1181 case dr_unaligned_unsupported
:
1183 *inside_cost
= VECT_MAX_COST
;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1187 "vect_model_load_cost: unsupported access.\n");
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1200 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1203 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1206 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1207 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1211 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1215 if (nested_in_vect_loop_p (loop
, stmt
))
1218 pe
= loop_preheader_edge (loop
);
1219 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1220 gcc_assert (!new_bb
);
1224 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1226 gimple_stmt_iterator gsi_bb_start
;
1228 gcc_assert (bb_vinfo
);
1229 bb
= BB_VINFO_BB (bb_vinfo
);
1230 gsi_bb_start
= gsi_after_labels (bb
);
1231 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE
, vect_location
,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1254 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1262 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1263 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type
))
1269 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1270 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1272 if (CONSTANT_CLASS_P (val
))
1273 val
= integer_zerop (val
) ? false_val
: true_val
;
1276 new_temp
= make_ssa_name (TREE_TYPE (type
));
1277 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1278 val
, true_val
, false_val
);
1279 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1283 else if (CONSTANT_CLASS_P (val
))
1284 val
= fold_convert (TREE_TYPE (type
), val
);
1287 new_temp
= make_ssa_name (TREE_TYPE (type
));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1289 init_stmt
= gimple_build_assign (new_temp
,
1290 fold_build1 (VIEW_CONVERT_EXPR
,
1294 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1295 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1299 val
= build_vector_from_val (type
, val
);
1302 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1303 init_stmt
= gimple_build_assign (new_temp
, val
);
1304 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1309 /* Function vect_get_vec_def_for_operand.
1311 OP is an operand in STMT. This function returns a (vector) def that will be
1312 used in the vectorized stmt for STMT.
1314 In the case that OP is an SSA_NAME which is defined in the loop, then
1315 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1317 In case OP is an invariant or constant, a new stmt that creates a vector def
1318 needs to be introduced. VECTYPE may be used to specify a required type for
1319 vector invariant. */
1322 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1327 stmt_vec_info def_stmt_info
= NULL
;
1328 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1329 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1330 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1331 enum vect_def_type dt
;
1335 if (dump_enabled_p ())
1337 dump_printf_loc (MSG_NOTE
, vect_location
,
1338 "vect_get_vec_def_for_operand: ");
1339 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1340 dump_printf (MSG_NOTE
, "\n");
1343 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1344 gcc_assert (is_simple_use
);
1345 if (dump_enabled_p ())
1347 int loc_printed
= 0;
1351 dump_printf (MSG_NOTE
, " def_stmt = ");
1353 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1354 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1360 /* operand is a constant or a loop invariant. */
1361 case vect_constant_def
:
1362 case vect_external_def
:
1365 vector_type
= vectype
;
1366 else if (TREE_CODE (TREE_TYPE (op
)) == BOOLEAN_TYPE
1367 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1368 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1370 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1372 gcc_assert (vector_type
);
1373 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1376 /* operand is defined inside the loop. */
1377 case vect_internal_def
:
1379 /* Get the def from the vectorized stmt. */
1380 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1382 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1383 /* Get vectorized pattern statement. */
1385 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1386 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1387 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1388 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1389 gcc_assert (vec_stmt
);
1390 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1391 vec_oprnd
= PHI_RESULT (vec_stmt
);
1392 else if (is_gimple_call (vec_stmt
))
1393 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1395 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1399 /* operand is defined by a loop header phi - reduction */
1400 case vect_reduction_def
:
1401 case vect_double_reduction_def
:
1402 case vect_nested_cycle
:
1403 /* Code should use get_initial_def_for_reduction. */
1406 /* operand is defined by loop-header phi - induction. */
1407 case vect_induction_def
:
1409 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1411 /* Get the def from the vectorized stmt. */
1412 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1413 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1414 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1415 vec_oprnd
= PHI_RESULT (vec_stmt
);
1417 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1427 /* Function vect_get_vec_def_for_stmt_copy
1429 Return a vector-def for an operand. This function is used when the
1430 vectorized stmt to be created (by the caller to this function) is a "copy"
1431 created in case the vectorized result cannot fit in one vector, and several
1432 copies of the vector-stmt are required. In this case the vector-def is
1433 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1434 of the stmt that defines VEC_OPRND.
1435 DT is the type of the vector def VEC_OPRND.
1438 In case the vectorization factor (VF) is bigger than the number
1439 of elements that can fit in a vectype (nunits), we have to generate
1440 more than one vector stmt to vectorize the scalar stmt. This situation
1441 arises when there are multiple data-types operated upon in the loop; the
1442 smallest data-type determines the VF, and as a result, when vectorizing
1443 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1444 vector stmt (each computing a vector of 'nunits' results, and together
1445 computing 'VF' results in each iteration). This function is called when
1446 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1447 which VF=16 and nunits=4, so the number of copies required is 4):
1449 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1451 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1452 VS1.1: vx.1 = memref1 VS1.2
1453 VS1.2: vx.2 = memref2 VS1.3
1454 VS1.3: vx.3 = memref3
1456 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1457 VSnew.1: vz1 = vx.1 + ... VSnew.2
1458 VSnew.2: vz2 = vx.2 + ... VSnew.3
1459 VSnew.3: vz3 = vx.3 + ...
1461 The vectorization of S1 is explained in vectorizable_load.
1462 The vectorization of S2:
1463 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1464 the function 'vect_get_vec_def_for_operand' is called to
1465 get the relevant vector-def for each operand of S2. For operand x it
1466 returns the vector-def 'vx.0'.
1468 To create the remaining copies of the vector-stmt (VSnew.j), this
1469 function is called to get the relevant vector-def for each operand. It is
1470 obtained from the respective VS1.j stmt, which is recorded in the
1471 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1473 For example, to obtain the vector-def 'vx.1' in order to create the
1474 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1475 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1476 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1477 and return its def ('vx.1').
1478 Overall, to create the above sequence this function will be called 3 times:
1479 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1480 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1481 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1484 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1486 gimple
*vec_stmt_for_operand
;
1487 stmt_vec_info def_stmt_info
;
1489 /* Do nothing; can reuse same def. */
1490 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1493 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1494 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1495 gcc_assert (def_stmt_info
);
1496 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1497 gcc_assert (vec_stmt_for_operand
);
1498 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1499 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1501 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1506 /* Get vectorized definitions for the operands to create a copy of an original
1507 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1510 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1511 vec
<tree
> *vec_oprnds0
,
1512 vec
<tree
> *vec_oprnds1
)
1514 tree vec_oprnd
= vec_oprnds0
->pop ();
1516 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1517 vec_oprnds0
->quick_push (vec_oprnd
);
1519 if (vec_oprnds1
&& vec_oprnds1
->length ())
1521 vec_oprnd
= vec_oprnds1
->pop ();
1522 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1523 vec_oprnds1
->quick_push (vec_oprnd
);
1528 /* Get vectorized definitions for OP0 and OP1.
1529 REDUC_INDEX is the index of reduction operand in case of reduction,
1530 and -1 otherwise. */
1533 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1534 vec
<tree
> *vec_oprnds0
,
1535 vec
<tree
> *vec_oprnds1
,
1536 slp_tree slp_node
, int reduc_index
)
1540 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1541 auto_vec
<tree
> ops (nops
);
1542 auto_vec
<vec
<tree
> > vec_defs (nops
);
1544 ops
.quick_push (op0
);
1546 ops
.quick_push (op1
);
1548 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1550 *vec_oprnds0
= vec_defs
[0];
1552 *vec_oprnds1
= vec_defs
[1];
1558 vec_oprnds0
->create (1);
1559 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1560 vec_oprnds0
->quick_push (vec_oprnd
);
1564 vec_oprnds1
->create (1);
1565 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1566 vec_oprnds1
->quick_push (vec_oprnd
);
1572 /* Function vect_finish_stmt_generation.
1574 Insert a new stmt. */
1577 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1578 gimple_stmt_iterator
*gsi
)
1580 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1581 vec_info
*vinfo
= stmt_info
->vinfo
;
1583 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1585 if (!gsi_end_p (*gsi
)
1586 && gimple_has_mem_ops (vec_stmt
))
1588 gimple
*at_stmt
= gsi_stmt (*gsi
);
1589 tree vuse
= gimple_vuse (at_stmt
);
1590 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1592 tree vdef
= gimple_vdef (at_stmt
);
1593 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1594 /* If we have an SSA vuse and insert a store, update virtual
1595 SSA form to avoid triggering the renamer. Do so only
1596 if we can easily see all uses - which is what almost always
1597 happens with the way vectorized stmts are inserted. */
1598 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1599 && ((is_gimple_assign (vec_stmt
)
1600 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1601 || (is_gimple_call (vec_stmt
)
1602 && !(gimple_call_flags (vec_stmt
)
1603 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1605 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1606 gimple_set_vdef (vec_stmt
, new_vdef
);
1607 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1611 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1613 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1615 if (dump_enabled_p ())
1617 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1618 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1621 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1623 /* While EH edges will generally prevent vectorization, stmt might
1624 e.g. be in a must-not-throw region. Ensure newly created stmts
1625 that could throw are part of the same region. */
1626 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1627 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1628 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1631 /* We want to vectorize a call to combined function CFN with function
1632 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1633 as the types of all inputs. Check whether this is possible using
1634 an internal function, returning its code if so or IFN_LAST if not. */
1637 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1638 tree vectype_out
, tree vectype_in
)
1641 if (internal_fn_p (cfn
))
1642 ifn
= as_internal_fn (cfn
);
1644 ifn
= associated_internal_fn (fndecl
);
1645 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1647 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1648 if (info
.vectorizable
)
1650 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1651 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1652 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1653 OPTIMIZE_FOR_SPEED
))
1661 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1662 gimple_stmt_iterator
*);
1665 /* Function vectorizable_mask_load_store.
1667 Check if STMT performs a conditional load or store that can be vectorized.
1668 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1669 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1670 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1673 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1674 gimple
**vec_stmt
, slp_tree slp_node
)
1676 tree vec_dest
= NULL
;
1677 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1678 stmt_vec_info prev_stmt_info
;
1679 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1680 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1681 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1682 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1683 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1684 tree rhs_vectype
= NULL_TREE
;
1689 tree dataref_ptr
= NULL_TREE
;
1691 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1695 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1696 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1697 int gather_scale
= 1;
1698 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1702 enum vect_def_type dt
;
1704 if (slp_node
!= NULL
)
1707 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1708 gcc_assert (ncopies
>= 1);
1710 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1711 mask
= gimple_call_arg (stmt
, 2);
1713 if (TREE_CODE (TREE_TYPE (mask
)) != BOOLEAN_TYPE
)
1716 /* FORNOW. This restriction should be relaxed. */
1717 if (nested_in_vect_loop
&& ncopies
> 1)
1719 if (dump_enabled_p ())
1720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1721 "multiple types in nested loop.");
1725 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1728 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
1732 if (!STMT_VINFO_DATA_REF (stmt_info
))
1735 elem_type
= TREE_TYPE (vectype
);
1737 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1740 if (STMT_VINFO_STRIDED_P (stmt_info
))
1743 if (TREE_CODE (mask
) != SSA_NAME
)
1746 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
1750 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
1752 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
1753 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
1758 tree rhs
= gimple_call_arg (stmt
, 3);
1759 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
1763 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1766 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
1767 &gather_off
, &gather_scale
);
1768 gcc_assert (gather_decl
);
1769 if (!vect_is_simple_use (gather_off
, loop_vinfo
, &def_stmt
, &gather_dt
,
1770 &gather_off_vectype
))
1772 if (dump_enabled_p ())
1773 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1774 "gather index use not simple.");
1778 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1780 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1781 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1783 if (dump_enabled_p ())
1784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1785 "masked gather with integer mask not supported.");
1789 else if (tree_int_cst_compare (nested_in_vect_loop
1790 ? STMT_VINFO_DR_STEP (stmt_info
)
1791 : DR_STEP (dr
), size_zero_node
) <= 0)
1793 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1794 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
1795 TYPE_MODE (mask_vectype
),
1798 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
1801 if (!vec_stmt
) /* transformation not required. */
1803 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1805 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1808 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1814 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1816 tree vec_oprnd0
= NULL_TREE
, op
;
1817 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1818 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1819 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1820 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1821 tree mask_perm_mask
= NULL_TREE
;
1822 edge pe
= loop_preheader_edge (loop
);
1825 enum { NARROW
, NONE
, WIDEN
} modifier
;
1826 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1828 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1829 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1830 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1831 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1832 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1833 scaletype
= TREE_VALUE (arglist
);
1834 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1835 && types_compatible_p (srctype
, masktype
));
1837 if (nunits
== gather_off_nunits
)
1839 else if (nunits
== gather_off_nunits
/ 2)
1841 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1844 for (i
= 0; i
< gather_off_nunits
; ++i
)
1845 sel
[i
] = i
| nunits
;
1847 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1849 else if (nunits
== gather_off_nunits
* 2)
1851 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1854 for (i
= 0; i
< nunits
; ++i
)
1855 sel
[i
] = i
< gather_off_nunits
1856 ? i
: i
+ nunits
- gather_off_nunits
;
1858 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1860 for (i
= 0; i
< nunits
; ++i
)
1861 sel
[i
] = i
| gather_off_nunits
;
1862 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1867 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1869 ptr
= fold_convert (ptrtype
, gather_base
);
1870 if (!is_gimple_min_invariant (ptr
))
1872 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1873 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1874 gcc_assert (!new_bb
);
1877 scale
= build_int_cst (scaletype
, gather_scale
);
1879 prev_stmt_info
= NULL
;
1880 for (j
= 0; j
< ncopies
; ++j
)
1882 if (modifier
== WIDEN
&& (j
& 1))
1883 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1884 perm_mask
, stmt
, gsi
);
1887 = vect_get_vec_def_for_operand (gather_off
, stmt
);
1890 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1892 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1894 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1895 == TYPE_VECTOR_SUBPARTS (idxtype
));
1896 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
1897 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1899 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1900 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1904 if (mask_perm_mask
&& (j
& 1))
1905 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1906 mask_perm_mask
, stmt
, gsi
);
1910 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
1913 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
1914 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1918 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1920 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1921 == TYPE_VECTOR_SUBPARTS (masktype
));
1922 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
1923 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1925 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
1926 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1932 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
1935 if (!useless_type_conversion_p (vectype
, rettype
))
1937 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
1938 == TYPE_VECTOR_SUBPARTS (rettype
));
1939 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
1940 gimple_call_set_lhs (new_stmt
, op
);
1941 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1942 var
= make_ssa_name (vec_dest
);
1943 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
1944 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1948 var
= make_ssa_name (vec_dest
, new_stmt
);
1949 gimple_call_set_lhs (new_stmt
, var
);
1952 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1954 if (modifier
== NARROW
)
1961 var
= permute_vec_elements (prev_res
, var
,
1962 perm_mask
, stmt
, gsi
);
1963 new_stmt
= SSA_NAME_DEF_STMT (var
);
1966 if (prev_stmt_info
== NULL
)
1967 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1969 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1970 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1973 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
1975 if (STMT_VINFO_RELATED_STMT (stmt_info
))
1977 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1978 stmt_info
= vinfo_for_stmt (stmt
);
1980 tree lhs
= gimple_call_lhs (stmt
);
1981 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
1982 set_vinfo_for_stmt (new_stmt
, stmt_info
);
1983 set_vinfo_for_stmt (stmt
, NULL
);
1984 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
1985 gsi_replace (gsi
, new_stmt
, true);
1990 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
1991 prev_stmt_info
= NULL
;
1992 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
1993 for (i
= 0; i
< ncopies
; i
++)
1995 unsigned align
, misalign
;
1999 tree rhs
= gimple_call_arg (stmt
, 3);
2000 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2001 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2002 /* We should have catched mismatched types earlier. */
2003 gcc_assert (useless_type_conversion_p (vectype
,
2004 TREE_TYPE (vec_rhs
)));
2005 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2006 NULL_TREE
, &dummy
, gsi
,
2007 &ptr_incr
, false, &inv_p
);
2008 gcc_assert (!inv_p
);
2012 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2013 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2014 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2015 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2016 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2017 TYPE_SIZE_UNIT (vectype
));
2020 align
= TYPE_ALIGN_UNIT (vectype
);
2021 if (aligned_access_p (dr
))
2023 else if (DR_MISALIGNMENT (dr
) == -1)
2025 align
= TYPE_ALIGN_UNIT (elem_type
);
2029 misalign
= DR_MISALIGNMENT (dr
);
2030 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2032 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2033 misalign
? misalign
& -misalign
: align
);
2035 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2036 ptr
, vec_mask
, vec_rhs
);
2037 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2039 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2041 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2042 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2047 tree vec_mask
= NULL_TREE
;
2048 prev_stmt_info
= NULL
;
2049 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2050 for (i
= 0; i
< ncopies
; i
++)
2052 unsigned align
, misalign
;
2056 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2057 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2058 NULL_TREE
, &dummy
, gsi
,
2059 &ptr_incr
, false, &inv_p
);
2060 gcc_assert (!inv_p
);
2064 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2065 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2066 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2067 TYPE_SIZE_UNIT (vectype
));
2070 align
= TYPE_ALIGN_UNIT (vectype
);
2071 if (aligned_access_p (dr
))
2073 else if (DR_MISALIGNMENT (dr
) == -1)
2075 align
= TYPE_ALIGN_UNIT (elem_type
);
2079 misalign
= DR_MISALIGNMENT (dr
);
2080 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2082 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2083 misalign
? misalign
& -misalign
: align
);
2085 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2087 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2088 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2090 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2092 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2093 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2099 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2101 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2103 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2104 stmt_info
= vinfo_for_stmt (stmt
);
2106 tree lhs
= gimple_call_lhs (stmt
);
2107 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2108 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2109 set_vinfo_for_stmt (stmt
, NULL
);
2110 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2111 gsi_replace (gsi
, new_stmt
, true);
2117 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2118 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2119 in a single step. On success, store the binary pack code in
2123 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2124 tree_code
*convert_code
)
2126 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2127 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2131 int multi_step_cvt
= 0;
2132 auto_vec
<tree
, 8> interm_types
;
2133 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2134 &code
, &multi_step_cvt
,
2139 *convert_code
= code
;
2143 /* Function vectorizable_call.
2145 Check if GS performs a function call that can be vectorized.
2146 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2147 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2148 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2151 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2158 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2159 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2160 tree vectype_out
, vectype_in
;
2163 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2164 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2165 vec_info
*vinfo
= stmt_info
->vinfo
;
2166 tree fndecl
, new_temp
, rhs_type
;
2168 enum vect_def_type dt
[3]
2169 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2170 gimple
*new_stmt
= NULL
;
2172 vec
<tree
> vargs
= vNULL
;
2173 enum { NARROW
, NONE
, WIDEN
} modifier
;
2177 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2180 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2184 /* Is GS a vectorizable call? */
2185 stmt
= dyn_cast
<gcall
*> (gs
);
2189 if (gimple_call_internal_p (stmt
)
2190 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2191 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2192 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2195 if (gimple_call_lhs (stmt
) == NULL_TREE
2196 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2199 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2201 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2203 /* Process function arguments. */
2204 rhs_type
= NULL_TREE
;
2205 vectype_in
= NULL_TREE
;
2206 nargs
= gimple_call_num_args (stmt
);
2208 /* Bail out if the function has more than three arguments, we do not have
2209 interesting builtin functions to vectorize with more than two arguments
2210 except for fma. No arguments is also not good. */
2211 if (nargs
== 0 || nargs
> 3)
2214 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2215 if (gimple_call_internal_p (stmt
)
2216 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2219 rhs_type
= unsigned_type_node
;
2222 for (i
= 0; i
< nargs
; i
++)
2226 op
= gimple_call_arg (stmt
, i
);
2228 /* We can only handle calls with arguments of the same type. */
2230 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2232 if (dump_enabled_p ())
2233 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2234 "argument types differ.\n");
2238 rhs_type
= TREE_TYPE (op
);
2240 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2244 "use not simple.\n");
2249 vectype_in
= opvectype
;
2251 && opvectype
!= vectype_in
)
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2255 "argument vector types differ.\n");
2259 /* If all arguments are external or constant defs use a vector type with
2260 the same size as the output vector type. */
2262 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2264 gcc_assert (vectype_in
);
2267 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2270 "no vectype for scalar type ");
2271 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2272 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2279 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2280 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2281 if (nunits_in
== nunits_out
/ 2)
2283 else if (nunits_out
== nunits_in
)
2285 else if (nunits_out
== nunits_in
/ 2)
2290 /* We only handle functions that do not read or clobber memory. */
2291 if (gimple_vuse (stmt
))
2293 if (dump_enabled_p ())
2294 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2295 "function reads from or writes to memory.\n");
2299 /* For now, we only vectorize functions if a target specific builtin
2300 is available. TODO -- in some cases, it might be profitable to
2301 insert the calls for pieces of the vector, in order to be able
2302 to vectorize other operations in the loop. */
2304 internal_fn ifn
= IFN_LAST
;
2305 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2306 tree callee
= gimple_call_fndecl (stmt
);
2308 /* First try using an internal function. */
2309 tree_code convert_code
= ERROR_MARK
;
2311 && (modifier
== NONE
2312 || (modifier
== NARROW
2313 && simple_integer_narrowing (vectype_out
, vectype_in
,
2315 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2318 /* If that fails, try asking for a target-specific built-in function. */
2319 if (ifn
== IFN_LAST
)
2321 if (cfn
!= CFN_LAST
)
2322 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2323 (cfn
, vectype_out
, vectype_in
);
2325 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2326 (callee
, vectype_out
, vectype_in
);
2329 if (ifn
== IFN_LAST
&& !fndecl
)
2331 if (cfn
== CFN_GOMP_SIMD_LANE
2334 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2335 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2336 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2337 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2339 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2340 { 0, 1, 2, ... vf - 1 } vector. */
2341 gcc_assert (nargs
== 0);
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2347 "function is not vectorizable.\n");
2354 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2355 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2357 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2359 /* Sanity check: make sure that at least one copy of the vectorized stmt
2360 needs to be generated. */
2361 gcc_assert (ncopies
>= 1);
2363 if (!vec_stmt
) /* transformation not required. */
2365 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2369 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2370 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2371 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2372 vec_promote_demote
, stmt_info
, 0, vect_body
);
2379 if (dump_enabled_p ())
2380 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2383 scalar_dest
= gimple_call_lhs (stmt
);
2384 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2386 prev_stmt_info
= NULL
;
2387 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2389 tree prev_res
= NULL_TREE
;
2390 for (j
= 0; j
< ncopies
; ++j
)
2392 /* Build argument list for the vectorized call. */
2394 vargs
.create (nargs
);
2400 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2401 vec
<tree
> vec_oprnds0
;
2403 for (i
= 0; i
< nargs
; i
++)
2404 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2405 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2406 vec_oprnds0
= vec_defs
[0];
2408 /* Arguments are ready. Create the new vector stmt. */
2409 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2412 for (k
= 0; k
< nargs
; k
++)
2414 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2415 vargs
[k
] = vec_oprndsk
[i
];
2417 if (modifier
== NARROW
)
2419 tree half_res
= make_ssa_name (vectype_in
);
2420 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2421 gimple_call_set_lhs (new_stmt
, half_res
);
2422 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2425 prev_res
= half_res
;
2428 new_temp
= make_ssa_name (vec_dest
);
2429 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2430 prev_res
, half_res
);
2434 if (ifn
!= IFN_LAST
)
2435 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2437 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2438 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2439 gimple_call_set_lhs (new_stmt
, new_temp
);
2441 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2442 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2445 for (i
= 0; i
< nargs
; i
++)
2447 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2448 vec_oprndsi
.release ();
2453 for (i
= 0; i
< nargs
; i
++)
2455 op
= gimple_call_arg (stmt
, i
);
2458 = vect_get_vec_def_for_operand (op
, stmt
);
2461 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2463 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2466 vargs
.quick_push (vec_oprnd0
);
2469 if (gimple_call_internal_p (stmt
)
2470 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2472 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2474 for (k
= 0; k
< nunits_out
; ++k
)
2475 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2476 tree cst
= build_vector (vectype_out
, v
);
2478 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2479 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2480 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2481 new_temp
= make_ssa_name (vec_dest
);
2482 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2484 else if (modifier
== NARROW
)
2486 tree half_res
= make_ssa_name (vectype_in
);
2487 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2488 gimple_call_set_lhs (new_stmt
, half_res
);
2489 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2492 prev_res
= half_res
;
2495 new_temp
= make_ssa_name (vec_dest
);
2496 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2497 prev_res
, half_res
);
2501 if (ifn
!= IFN_LAST
)
2502 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2504 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2505 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2506 gimple_call_set_lhs (new_stmt
, new_temp
);
2508 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2510 if (j
== (modifier
== NARROW
? 1 : 0))
2511 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2513 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2515 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2518 else if (modifier
== NARROW
)
2520 for (j
= 0; j
< ncopies
; ++j
)
2522 /* Build argument list for the vectorized call. */
2524 vargs
.create (nargs
* 2);
2530 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2531 vec
<tree
> vec_oprnds0
;
2533 for (i
= 0; i
< nargs
; i
++)
2534 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2535 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2536 vec_oprnds0
= vec_defs
[0];
2538 /* Arguments are ready. Create the new vector stmt. */
2539 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2543 for (k
= 0; k
< nargs
; k
++)
2545 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2546 vargs
.quick_push (vec_oprndsk
[i
]);
2547 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2549 if (ifn
!= IFN_LAST
)
2550 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2552 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2553 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2554 gimple_call_set_lhs (new_stmt
, new_temp
);
2555 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2556 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2559 for (i
= 0; i
< nargs
; i
++)
2561 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2562 vec_oprndsi
.release ();
2567 for (i
= 0; i
< nargs
; i
++)
2569 op
= gimple_call_arg (stmt
, i
);
2573 = vect_get_vec_def_for_operand (op
, stmt
);
2575 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2579 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2581 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2583 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2586 vargs
.quick_push (vec_oprnd0
);
2587 vargs
.quick_push (vec_oprnd1
);
2590 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2591 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2592 gimple_call_set_lhs (new_stmt
, new_temp
);
2593 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2596 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2598 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2600 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2603 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2606 /* No current target implements this case. */
2611 /* The call in STMT might prevent it from being removed in dce.
2612 We however cannot remove it here, due to the way the ssa name
2613 it defines is mapped to the new definition. So just replace
2614 rhs of the statement with something harmless. */
2619 type
= TREE_TYPE (scalar_dest
);
2620 if (is_pattern_stmt_p (stmt_info
))
2621 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2623 lhs
= gimple_call_lhs (stmt
);
2625 if (gimple_call_internal_p (stmt
)
2626 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2628 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2629 with vf - 1 rather than 0, that is the last iteration of the
2631 imm_use_iterator iter
;
2632 use_operand_p use_p
;
2634 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2636 basic_block use_bb
= gimple_bb (use_stmt
);
2638 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), use_bb
))
2640 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2641 SET_USE (use_p
, build_int_cst (TREE_TYPE (lhs
),
2642 ncopies
* nunits_out
- 1));
2643 update_stmt (use_stmt
);
2648 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2649 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2650 set_vinfo_for_stmt (stmt
, NULL
);
2651 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2652 gsi_replace (gsi
, new_stmt
, false);
2658 struct simd_call_arg_info
2662 enum vect_def_type dt
;
2663 HOST_WIDE_INT linear_step
;
2665 bool simd_lane_linear
;
2668 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2669 is linear within simd lane (but not within whole loop), note it in
2673 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2674 struct simd_call_arg_info
*arginfo
)
2676 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2678 if (!is_gimple_assign (def_stmt
)
2679 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2680 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2683 tree base
= gimple_assign_rhs1 (def_stmt
);
2684 HOST_WIDE_INT linear_step
= 0;
2685 tree v
= gimple_assign_rhs2 (def_stmt
);
2686 while (TREE_CODE (v
) == SSA_NAME
)
2689 def_stmt
= SSA_NAME_DEF_STMT (v
);
2690 if (is_gimple_assign (def_stmt
))
2691 switch (gimple_assign_rhs_code (def_stmt
))
2694 t
= gimple_assign_rhs2 (def_stmt
);
2695 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2697 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2698 v
= gimple_assign_rhs1 (def_stmt
);
2701 t
= gimple_assign_rhs2 (def_stmt
);
2702 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2704 linear_step
= tree_to_shwi (t
);
2705 v
= gimple_assign_rhs1 (def_stmt
);
2708 t
= gimple_assign_rhs1 (def_stmt
);
2709 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2710 || (TYPE_PRECISION (TREE_TYPE (v
))
2711 < TYPE_PRECISION (TREE_TYPE (t
))))
2720 else if (is_gimple_call (def_stmt
)
2721 && gimple_call_internal_p (def_stmt
)
2722 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
2724 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
2725 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
2730 arginfo
->linear_step
= linear_step
;
2732 arginfo
->simd_lane_linear
= true;
2738 /* Function vectorizable_simd_clone_call.
2740 Check if STMT performs a function call that can be vectorized
2741 by calling a simd clone of the function.
2742 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2743 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2744 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2747 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2748 gimple
**vec_stmt
, slp_tree slp_node
)
2753 tree vec_oprnd0
= NULL_TREE
;
2754 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2756 unsigned int nunits
;
2757 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2758 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2759 vec_info
*vinfo
= stmt_info
->vinfo
;
2760 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2761 tree fndecl
, new_temp
;
2763 gimple
*new_stmt
= NULL
;
2765 auto_vec
<simd_call_arg_info
> arginfo
;
2766 vec
<tree
> vargs
= vNULL
;
2768 tree lhs
, rtype
, ratype
;
2769 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2771 /* Is STMT a vectorizable call? */
2772 if (!is_gimple_call (stmt
))
2775 fndecl
= gimple_call_fndecl (stmt
);
2776 if (fndecl
== NULL_TREE
)
2779 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2780 if (node
== NULL
|| node
->simd_clones
== NULL
)
2783 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2786 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2790 if (gimple_call_lhs (stmt
)
2791 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2794 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2796 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2798 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2805 /* Process function arguments. */
2806 nargs
= gimple_call_num_args (stmt
);
2808 /* Bail out if the function has zero arguments. */
2812 arginfo
.reserve (nargs
, true);
2814 for (i
= 0; i
< nargs
; i
++)
2816 simd_call_arg_info thisarginfo
;
2819 thisarginfo
.linear_step
= 0;
2820 thisarginfo
.align
= 0;
2821 thisarginfo
.op
= NULL_TREE
;
2822 thisarginfo
.simd_lane_linear
= false;
2824 op
= gimple_call_arg (stmt
, i
);
2825 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
2826 &thisarginfo
.vectype
)
2827 || thisarginfo
.dt
== vect_uninitialized_def
)
2829 if (dump_enabled_p ())
2830 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2831 "use not simple.\n");
2835 if (thisarginfo
.dt
== vect_constant_def
2836 || thisarginfo
.dt
== vect_external_def
)
2837 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2839 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2841 /* For linear arguments, the analyze phase should have saved
2842 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2843 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2844 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
2846 gcc_assert (vec_stmt
);
2847 thisarginfo
.linear_step
2848 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
2850 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
2851 thisarginfo
.simd_lane_linear
2852 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
2853 == boolean_true_node
);
2854 /* If loop has been peeled for alignment, we need to adjust it. */
2855 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2856 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2857 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
2859 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2860 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
2861 tree opt
= TREE_TYPE (thisarginfo
.op
);
2862 bias
= fold_convert (TREE_TYPE (step
), bias
);
2863 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2865 = fold_build2 (POINTER_TYPE_P (opt
)
2866 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2867 thisarginfo
.op
, bias
);
2871 && thisarginfo
.dt
!= vect_constant_def
2872 && thisarginfo
.dt
!= vect_external_def
2874 && TREE_CODE (op
) == SSA_NAME
2875 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2877 && tree_fits_shwi_p (iv
.step
))
2879 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2880 thisarginfo
.op
= iv
.base
;
2882 else if ((thisarginfo
.dt
== vect_constant_def
2883 || thisarginfo
.dt
== vect_external_def
)
2884 && POINTER_TYPE_P (TREE_TYPE (op
)))
2885 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2886 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2888 if (POINTER_TYPE_P (TREE_TYPE (op
))
2889 && !thisarginfo
.linear_step
2891 && thisarginfo
.dt
!= vect_constant_def
2892 && thisarginfo
.dt
!= vect_external_def
2895 && TREE_CODE (op
) == SSA_NAME
)
2896 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
2898 arginfo
.quick_push (thisarginfo
);
2901 unsigned int badness
= 0;
2902 struct cgraph_node
*bestn
= NULL
;
2903 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2904 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2906 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2907 n
= n
->simdclone
->next_clone
)
2909 unsigned int this_badness
= 0;
2910 if (n
->simdclone
->simdlen
2911 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2912 || n
->simdclone
->nargs
!= nargs
)
2914 if (n
->simdclone
->simdlen
2915 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2916 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2917 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2918 if (n
->simdclone
->inbranch
)
2919 this_badness
+= 2048;
2920 int target_badness
= targetm
.simd_clone
.usable (n
);
2921 if (target_badness
< 0)
2923 this_badness
+= target_badness
* 512;
2924 /* FORNOW: Have to add code to add the mask argument. */
2925 if (n
->simdclone
->inbranch
)
2927 for (i
= 0; i
< nargs
; i
++)
2929 switch (n
->simdclone
->args
[i
].arg_type
)
2931 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2932 if (!useless_type_conversion_p
2933 (n
->simdclone
->args
[i
].orig_type
,
2934 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2936 else if (arginfo
[i
].dt
== vect_constant_def
2937 || arginfo
[i
].dt
== vect_external_def
2938 || arginfo
[i
].linear_step
)
2941 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2942 if (arginfo
[i
].dt
!= vect_constant_def
2943 && arginfo
[i
].dt
!= vect_external_def
)
2946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
2948 if (arginfo
[i
].dt
== vect_constant_def
2949 || arginfo
[i
].dt
== vect_external_def
2950 || (arginfo
[i
].linear_step
2951 != n
->simdclone
->args
[i
].linear_step
))
2954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
2957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
2958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
2959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
2963 case SIMD_CLONE_ARG_TYPE_MASK
:
2966 if (i
== (size_t) -1)
2968 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2973 if (arginfo
[i
].align
)
2974 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2975 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2977 if (i
== (size_t) -1)
2979 if (bestn
== NULL
|| this_badness
< badness
)
2982 badness
= this_badness
;
2989 for (i
= 0; i
< nargs
; i
++)
2990 if ((arginfo
[i
].dt
== vect_constant_def
2991 || arginfo
[i
].dt
== vect_external_def
)
2992 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2995 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2997 if (arginfo
[i
].vectype
== NULL
2998 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2999 > bestn
->simdclone
->simdlen
))
3003 fndecl
= bestn
->decl
;
3004 nunits
= bestn
->simdclone
->simdlen
;
3005 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3007 /* If the function isn't const, only allow it in simd loops where user
3008 has asserted that at least nunits consecutive iterations can be
3009 performed using SIMD instructions. */
3010 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3011 && gimple_vuse (stmt
))
3014 /* Sanity check: make sure that at least one copy of the vectorized stmt
3015 needs to be generated. */
3016 gcc_assert (ncopies
>= 1);
3018 if (!vec_stmt
) /* transformation not required. */
3020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3021 for (i
= 0; i
< nargs
; i
++)
3022 if ((bestn
->simdclone
->args
[i
].arg_type
3023 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3024 || (bestn
->simdclone
->args
[i
].arg_type
3025 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3030 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3031 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3032 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3033 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3034 tree sll
= arginfo
[i
].simd_lane_linear
3035 ? boolean_true_node
: boolean_false_node
;
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3038 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3039 if (dump_enabled_p ())
3040 dump_printf_loc (MSG_NOTE
, vect_location
,
3041 "=== vectorizable_simd_clone_call ===\n");
3042 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3052 scalar_dest
= gimple_call_lhs (stmt
);
3053 vec_dest
= NULL_TREE
;
3058 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3059 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3060 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3063 rtype
= TREE_TYPE (ratype
);
3067 prev_stmt_info
= NULL
;
3068 for (j
= 0; j
< ncopies
; ++j
)
3070 /* Build argument list for the vectorized call. */
3072 vargs
.create (nargs
);
3076 for (i
= 0; i
< nargs
; i
++)
3078 unsigned int k
, l
, m
, o
;
3080 op
= gimple_call_arg (stmt
, i
);
3081 switch (bestn
->simdclone
->args
[i
].arg_type
)
3083 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3084 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3085 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3086 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3088 if (TYPE_VECTOR_SUBPARTS (atype
)
3089 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3091 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3092 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3093 / TYPE_VECTOR_SUBPARTS (atype
));
3094 gcc_assert ((k
& (k
- 1)) == 0);
3097 = vect_get_vec_def_for_operand (op
, stmt
);
3100 vec_oprnd0
= arginfo
[i
].op
;
3101 if ((m
& (k
- 1)) == 0)
3103 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3106 arginfo
[i
].op
= vec_oprnd0
;
3108 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3110 bitsize_int ((m
& (k
- 1)) * prec
));
3112 = gimple_build_assign (make_ssa_name (atype
),
3114 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3115 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3119 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3120 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3121 gcc_assert ((k
& (k
- 1)) == 0);
3122 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3124 vec_alloc (ctor_elts
, k
);
3127 for (l
= 0; l
< k
; l
++)
3129 if (m
== 0 && l
== 0)
3131 = vect_get_vec_def_for_operand (op
, stmt
);
3134 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3136 arginfo
[i
].op
= vec_oprnd0
;
3139 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3143 vargs
.safe_push (vec_oprnd0
);
3146 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3148 = gimple_build_assign (make_ssa_name (atype
),
3150 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3151 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3156 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3157 vargs
.safe_push (op
);
3159 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3160 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3165 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3170 edge pe
= loop_preheader_edge (loop
);
3171 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3172 gcc_assert (!new_bb
);
3174 if (arginfo
[i
].simd_lane_linear
)
3176 vargs
.safe_push (arginfo
[i
].op
);
3179 tree phi_res
= copy_ssa_name (op
);
3180 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3181 set_vinfo_for_stmt (new_phi
,
3182 new_stmt_vec_info (new_phi
, loop_vinfo
));
3183 add_phi_arg (new_phi
, arginfo
[i
].op
,
3184 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3186 = POINTER_TYPE_P (TREE_TYPE (op
))
3187 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3188 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3189 ? sizetype
: TREE_TYPE (op
);
3191 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3193 tree tcst
= wide_int_to_tree (type
, cst
);
3194 tree phi_arg
= copy_ssa_name (op
);
3196 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3197 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3198 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3199 set_vinfo_for_stmt (new_stmt
,
3200 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3201 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3203 arginfo
[i
].op
= phi_res
;
3204 vargs
.safe_push (phi_res
);
3209 = POINTER_TYPE_P (TREE_TYPE (op
))
3210 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3211 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3212 ? sizetype
: TREE_TYPE (op
);
3214 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3216 tree tcst
= wide_int_to_tree (type
, cst
);
3217 new_temp
= make_ssa_name (TREE_TYPE (op
));
3218 new_stmt
= gimple_build_assign (new_temp
, code
,
3219 arginfo
[i
].op
, tcst
);
3220 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3221 vargs
.safe_push (new_temp
);
3224 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3225 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3226 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3227 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3228 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3229 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3235 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3238 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3240 new_temp
= create_tmp_var (ratype
);
3241 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3242 == TYPE_VECTOR_SUBPARTS (rtype
))
3243 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3245 new_temp
= make_ssa_name (rtype
, new_stmt
);
3246 gimple_call_set_lhs (new_stmt
, new_temp
);
3248 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3252 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3255 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3256 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3257 gcc_assert ((k
& (k
- 1)) == 0);
3258 for (l
= 0; l
< k
; l
++)
3263 t
= build_fold_addr_expr (new_temp
);
3264 t
= build2 (MEM_REF
, vectype
, t
,
3265 build_int_cst (TREE_TYPE (t
),
3266 l
* prec
/ BITS_PER_UNIT
));
3269 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3270 size_int (prec
), bitsize_int (l
* prec
));
3272 = gimple_build_assign (make_ssa_name (vectype
), t
);
3273 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3274 if (j
== 0 && l
== 0)
3275 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3277 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3279 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3284 tree clobber
= build_constructor (ratype
, NULL
);
3285 TREE_THIS_VOLATILE (clobber
) = 1;
3286 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3287 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3291 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3293 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3294 / TYPE_VECTOR_SUBPARTS (rtype
));
3295 gcc_assert ((k
& (k
- 1)) == 0);
3296 if ((j
& (k
- 1)) == 0)
3297 vec_alloc (ret_ctor_elts
, k
);
3300 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3301 for (m
= 0; m
< o
; m
++)
3303 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3304 size_int (m
), NULL_TREE
, NULL_TREE
);
3306 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3307 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3308 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3309 gimple_assign_lhs (new_stmt
));
3311 tree clobber
= build_constructor (ratype
, NULL
);
3312 TREE_THIS_VOLATILE (clobber
) = 1;
3313 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3314 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3318 if ((j
& (k
- 1)) != k
- 1)
3320 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3322 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3323 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3325 if ((unsigned) j
== k
- 1)
3326 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3328 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3330 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3335 tree t
= build_fold_addr_expr (new_temp
);
3336 t
= build2 (MEM_REF
, vectype
, t
,
3337 build_int_cst (TREE_TYPE (t
), 0));
3339 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3340 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3341 tree clobber
= build_constructor (ratype
, NULL
);
3342 TREE_THIS_VOLATILE (clobber
) = 1;
3343 vect_finish_stmt_generation (stmt
,
3344 gimple_build_assign (new_temp
,
3350 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3352 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3354 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3359 /* The call in STMT might prevent it from being removed in dce.
3360 We however cannot remove it here, due to the way the ssa name
3361 it defines is mapped to the new definition. So just replace
3362 rhs of the statement with something harmless. */
3369 type
= TREE_TYPE (scalar_dest
);
3370 if (is_pattern_stmt_p (stmt_info
))
3371 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3373 lhs
= gimple_call_lhs (stmt
);
3374 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3377 new_stmt
= gimple_build_nop ();
3378 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3379 set_vinfo_for_stmt (stmt
, NULL
);
3380 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3381 gsi_replace (gsi
, new_stmt
, true);
3382 unlink_stmt_vdef (stmt
);
3388 /* Function vect_gen_widened_results_half
3390 Create a vector stmt whose code, type, number of arguments, and result
3391 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3392 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3393 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3394 needs to be created (DECL is a function-decl of a target-builtin).
3395 STMT is the original scalar stmt that we are vectorizing. */
3398 vect_gen_widened_results_half (enum tree_code code
,
3400 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3401 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3407 /* Generate half of the widened result: */
3408 if (code
== CALL_EXPR
)
3410 /* Target specific support */
3411 if (op_type
== binary_op
)
3412 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3414 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3415 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3416 gimple_call_set_lhs (new_stmt
, new_temp
);
3420 /* Generic support */
3421 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3422 if (op_type
!= binary_op
)
3424 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3425 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3426 gimple_assign_set_lhs (new_stmt
, new_temp
);
3428 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3434 /* Get vectorized definitions for loop-based vectorization. For the first
3435 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3436 scalar operand), and for the rest we get a copy with
3437 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3438 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3439 The vectors are collected into VEC_OPRNDS. */
3442 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3443 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3447 /* Get first vector operand. */
3448 /* All the vector operands except the very first one (that is scalar oprnd)
3450 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3451 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3453 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3455 vec_oprnds
->quick_push (vec_oprnd
);
3457 /* Get second vector operand. */
3458 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3459 vec_oprnds
->quick_push (vec_oprnd
);
3463 /* For conversion in multiple steps, continue to get operands
3466 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3470 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3471 For multi-step conversions store the resulting vectors and call the function
3475 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3476 int multi_step_cvt
, gimple
*stmt
,
3478 gimple_stmt_iterator
*gsi
,
3479 slp_tree slp_node
, enum tree_code code
,
3480 stmt_vec_info
*prev_stmt_info
)
3483 tree vop0
, vop1
, new_tmp
, vec_dest
;
3485 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3487 vec_dest
= vec_dsts
.pop ();
3489 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3491 /* Create demotion operation. */
3492 vop0
= (*vec_oprnds
)[i
];
3493 vop1
= (*vec_oprnds
)[i
+ 1];
3494 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3495 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3496 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3497 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3500 /* Store the resulting vector for next recursive call. */
3501 (*vec_oprnds
)[i
/2] = new_tmp
;
3504 /* This is the last step of the conversion sequence. Store the
3505 vectors in SLP_NODE or in vector info of the scalar statement
3506 (or in STMT_VINFO_RELATED_STMT chain). */
3508 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3511 if (!*prev_stmt_info
)
3512 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3514 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3516 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3521 /* For multi-step demotion operations we first generate demotion operations
3522 from the source type to the intermediate types, and then combine the
3523 results (stored in VEC_OPRNDS) in demotion operation to the destination
3527 /* At each level of recursion we have half of the operands we had at the
3529 vec_oprnds
->truncate ((i
+1)/2);
3530 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3531 stmt
, vec_dsts
, gsi
, slp_node
,
3532 VEC_PACK_TRUNC_EXPR
,
3536 vec_dsts
.quick_push (vec_dest
);
3540 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3541 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3542 the resulting vectors and call the function recursively. */
3545 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3546 vec
<tree
> *vec_oprnds1
,
3547 gimple
*stmt
, tree vec_dest
,
3548 gimple_stmt_iterator
*gsi
,
3549 enum tree_code code1
,
3550 enum tree_code code2
, tree decl1
,
3551 tree decl2
, int op_type
)
3554 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3555 gimple
*new_stmt1
, *new_stmt2
;
3556 vec
<tree
> vec_tmp
= vNULL
;
3558 vec_tmp
.create (vec_oprnds0
->length () * 2);
3559 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3561 if (op_type
== binary_op
)
3562 vop1
= (*vec_oprnds1
)[i
];
3566 /* Generate the two halves of promotion operation. */
3567 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3568 op_type
, vec_dest
, gsi
, stmt
);
3569 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3570 op_type
, vec_dest
, gsi
, stmt
);
3571 if (is_gimple_call (new_stmt1
))
3573 new_tmp1
= gimple_call_lhs (new_stmt1
);
3574 new_tmp2
= gimple_call_lhs (new_stmt2
);
3578 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3579 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3582 /* Store the results for the next step. */
3583 vec_tmp
.quick_push (new_tmp1
);
3584 vec_tmp
.quick_push (new_tmp2
);
3587 vec_oprnds0
->release ();
3588 *vec_oprnds0
= vec_tmp
;
3592 /* Check if STMT performs a conversion operation, that can be vectorized.
3593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3594 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3598 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3599 gimple
**vec_stmt
, slp_tree slp_node
)
3603 tree op0
, op1
= NULL_TREE
;
3604 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3605 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3606 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3607 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3608 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3609 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3612 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3613 gimple
*new_stmt
= NULL
;
3614 stmt_vec_info prev_stmt_info
;
3617 tree vectype_out
, vectype_in
;
3619 tree lhs_type
, rhs_type
;
3620 enum { NARROW
, NONE
, WIDEN
} modifier
;
3621 vec
<tree
> vec_oprnds0
= vNULL
;
3622 vec
<tree
> vec_oprnds1
= vNULL
;
3624 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3625 vec_info
*vinfo
= stmt_info
->vinfo
;
3626 int multi_step_cvt
= 0;
3627 vec
<tree
> vec_dsts
= vNULL
;
3628 vec
<tree
> interm_types
= vNULL
;
3629 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3631 machine_mode rhs_mode
;
3632 unsigned short fltsz
;
3634 /* Is STMT a vectorizable conversion? */
3636 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3639 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3643 if (!is_gimple_assign (stmt
))
3646 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3649 code
= gimple_assign_rhs_code (stmt
);
3650 if (!CONVERT_EXPR_CODE_P (code
)
3651 && code
!= FIX_TRUNC_EXPR
3652 && code
!= FLOAT_EXPR
3653 && code
!= WIDEN_MULT_EXPR
3654 && code
!= WIDEN_LSHIFT_EXPR
)
3657 op_type
= TREE_CODE_LENGTH (code
);
3659 /* Check types of lhs and rhs. */
3660 scalar_dest
= gimple_assign_lhs (stmt
);
3661 lhs_type
= TREE_TYPE (scalar_dest
);
3662 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3664 op0
= gimple_assign_rhs1 (stmt
);
3665 rhs_type
= TREE_TYPE (op0
);
3667 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3668 && !((INTEGRAL_TYPE_P (lhs_type
)
3669 && INTEGRAL_TYPE_P (rhs_type
))
3670 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3671 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3674 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3675 && ((INTEGRAL_TYPE_P (lhs_type
)
3676 && (TYPE_PRECISION (lhs_type
)
3677 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3678 || (INTEGRAL_TYPE_P (rhs_type
)
3679 && (TYPE_PRECISION (rhs_type
)
3680 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
3682 if (dump_enabled_p ())
3683 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3684 "type conversion to/from bit-precision unsupported."
3689 /* Check the operands of the operation. */
3690 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
3692 if (dump_enabled_p ())
3693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3694 "use not simple.\n");
3697 if (op_type
== binary_op
)
3701 op1
= gimple_assign_rhs2 (stmt
);
3702 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3703 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3705 if (CONSTANT_CLASS_P (op0
))
3706 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
3708 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
3712 if (dump_enabled_p ())
3713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3714 "use not simple.\n");
3719 /* If op0 is an external or constant defs use a vector type of
3720 the same size as the output vector type. */
3722 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3724 gcc_assert (vectype_in
);
3727 if (dump_enabled_p ())
3729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3730 "no vectype for scalar type ");
3731 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3732 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3738 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3739 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3741 if (dump_enabled_p ())
3743 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3744 "can't convert between boolean and non "
3746 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3747 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3753 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3754 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3755 if (nunits_in
< nunits_out
)
3757 else if (nunits_out
== nunits_in
)
3762 /* Multiple types in SLP are handled by creating the appropriate number of
3763 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3767 else if (modifier
== NARROW
)
3768 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3770 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3772 /* Sanity check: make sure that at least one copy of the vectorized stmt
3773 needs to be generated. */
3774 gcc_assert (ncopies
>= 1);
3776 /* Supportable by target? */
3780 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3782 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3787 if (dump_enabled_p ())
3788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3789 "conversion not supported by target.\n");
3793 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3794 &code1
, &code2
, &multi_step_cvt
,
3797 /* Binary widening operation can only be supported directly by the
3799 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3803 if (code
!= FLOAT_EXPR
3804 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3805 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3808 rhs_mode
= TYPE_MODE (rhs_type
);
3809 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3810 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3811 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3812 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3815 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3816 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3817 if (cvt_type
== NULL_TREE
)
3820 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3822 if (!supportable_convert_operation (code
, vectype_out
,
3823 cvt_type
, &decl1
, &codecvt1
))
3826 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3827 cvt_type
, &codecvt1
,
3828 &codecvt2
, &multi_step_cvt
,
3832 gcc_assert (multi_step_cvt
== 0);
3834 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3835 vectype_in
, &code1
, &code2
,
3836 &multi_step_cvt
, &interm_types
))
3840 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3843 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3844 codecvt2
= ERROR_MARK
;
3848 interm_types
.safe_push (cvt_type
);
3849 cvt_type
= NULL_TREE
;
3854 gcc_assert (op_type
== unary_op
);
3855 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3856 &code1
, &multi_step_cvt
,
3860 if (code
!= FIX_TRUNC_EXPR
3861 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3862 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3865 rhs_mode
= TYPE_MODE (rhs_type
);
3867 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3868 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3869 if (cvt_type
== NULL_TREE
)
3871 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3874 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3875 &code1
, &multi_step_cvt
,
3884 if (!vec_stmt
) /* transformation not required. */
3886 if (dump_enabled_p ())
3887 dump_printf_loc (MSG_NOTE
, vect_location
,
3888 "=== vectorizable_conversion ===\n");
3889 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3891 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3892 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3894 else if (modifier
== NARROW
)
3896 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3897 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3901 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3902 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3904 interm_types
.release ();
3909 if (dump_enabled_p ())
3910 dump_printf_loc (MSG_NOTE
, vect_location
,
3911 "transform conversion. ncopies = %d.\n", ncopies
);
3913 if (op_type
== binary_op
)
3915 if (CONSTANT_CLASS_P (op0
))
3916 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3917 else if (CONSTANT_CLASS_P (op1
))
3918 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3921 /* In case of multi-step conversion, we first generate conversion operations
3922 to the intermediate types, and then from that types to the final one.
3923 We create vector destinations for the intermediate type (TYPES) received
3924 from supportable_*_operation, and store them in the correct order
3925 for future use in vect_create_vectorized_*_stmts (). */
3926 vec_dsts
.create (multi_step_cvt
+ 1);
3927 vec_dest
= vect_create_destination_var (scalar_dest
,
3928 (cvt_type
&& modifier
== WIDEN
)
3929 ? cvt_type
: vectype_out
);
3930 vec_dsts
.quick_push (vec_dest
);
3934 for (i
= interm_types
.length () - 1;
3935 interm_types
.iterate (i
, &intermediate_type
); i
--)
3937 vec_dest
= vect_create_destination_var (scalar_dest
,
3939 vec_dsts
.quick_push (vec_dest
);
3944 vec_dest
= vect_create_destination_var (scalar_dest
,
3946 ? vectype_out
: cvt_type
);
3950 if (modifier
== WIDEN
)
3952 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3953 if (op_type
== binary_op
)
3954 vec_oprnds1
.create (1);
3956 else if (modifier
== NARROW
)
3957 vec_oprnds0
.create (
3958 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3960 else if (code
== WIDEN_LSHIFT_EXPR
)
3961 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3964 prev_stmt_info
= NULL
;
3968 for (j
= 0; j
< ncopies
; j
++)
3971 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3974 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3976 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3978 /* Arguments are ready, create the new vector stmt. */
3979 if (code1
== CALL_EXPR
)
3981 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3982 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3983 gimple_call_set_lhs (new_stmt
, new_temp
);
3987 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3988 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3989 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3990 gimple_assign_set_lhs (new_stmt
, new_temp
);
3993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3995 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3998 if (!prev_stmt_info
)
3999 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4001 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4002 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4009 /* In case the vectorization factor (VF) is bigger than the number
4010 of elements that we can fit in a vectype (nunits), we have to
4011 generate more than one vector stmt - i.e - we need to "unroll"
4012 the vector stmt by a factor VF/nunits. */
4013 for (j
= 0; j
< ncopies
; j
++)
4020 if (code
== WIDEN_LSHIFT_EXPR
)
4025 /* Store vec_oprnd1 for every vector stmt to be created
4026 for SLP_NODE. We check during the analysis that all
4027 the shift arguments are the same. */
4028 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4029 vec_oprnds1
.quick_push (vec_oprnd1
);
4031 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4035 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4036 &vec_oprnds1
, slp_node
, -1);
4040 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4041 vec_oprnds0
.quick_push (vec_oprnd0
);
4042 if (op_type
== binary_op
)
4044 if (code
== WIDEN_LSHIFT_EXPR
)
4047 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4048 vec_oprnds1
.quick_push (vec_oprnd1
);
4054 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4055 vec_oprnds0
.truncate (0);
4056 vec_oprnds0
.quick_push (vec_oprnd0
);
4057 if (op_type
== binary_op
)
4059 if (code
== WIDEN_LSHIFT_EXPR
)
4062 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4064 vec_oprnds1
.truncate (0);
4065 vec_oprnds1
.quick_push (vec_oprnd1
);
4069 /* Arguments are ready. Create the new vector stmts. */
4070 for (i
= multi_step_cvt
; i
>= 0; i
--)
4072 tree this_dest
= vec_dsts
[i
];
4073 enum tree_code c1
= code1
, c2
= code2
;
4074 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4079 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4081 stmt
, this_dest
, gsi
,
4082 c1
, c2
, decl1
, decl2
,
4086 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4090 if (codecvt1
== CALL_EXPR
)
4092 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4093 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4094 gimple_call_set_lhs (new_stmt
, new_temp
);
4098 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4099 new_temp
= make_ssa_name (vec_dest
);
4100 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4104 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4107 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4110 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4113 if (!prev_stmt_info
)
4114 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4116 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4117 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4122 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4126 /* In case the vectorization factor (VF) is bigger than the number
4127 of elements that we can fit in a vectype (nunits), we have to
4128 generate more than one vector stmt - i.e - we need to "unroll"
4129 the vector stmt by a factor VF/nunits. */
4130 for (j
= 0; j
< ncopies
; j
++)
4134 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4138 vec_oprnds0
.truncate (0);
4139 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4140 vect_pow2 (multi_step_cvt
) - 1);
4143 /* Arguments are ready. Create the new vector stmts. */
4145 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4147 if (codecvt1
== CALL_EXPR
)
4149 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4150 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4151 gimple_call_set_lhs (new_stmt
, new_temp
);
4155 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4156 new_temp
= make_ssa_name (vec_dest
);
4157 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4161 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4162 vec_oprnds0
[i
] = new_temp
;
4165 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4166 stmt
, vec_dsts
, gsi
,
4171 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4175 vec_oprnds0
.release ();
4176 vec_oprnds1
.release ();
4177 vec_dsts
.release ();
4178 interm_types
.release ();
4184 /* Function vectorizable_assignment.
4186 Check if STMT performs an assignment (copy) that can be vectorized.
4187 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4188 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4192 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4193 gimple
**vec_stmt
, slp_tree slp_node
)
4198 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4199 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4202 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4205 vec
<tree
> vec_oprnds
= vNULL
;
4207 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4208 vec_info
*vinfo
= stmt_info
->vinfo
;
4209 gimple
*new_stmt
= NULL
;
4210 stmt_vec_info prev_stmt_info
= NULL
;
4211 enum tree_code code
;
4214 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4217 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4221 /* Is vectorizable assignment? */
4222 if (!is_gimple_assign (stmt
))
4225 scalar_dest
= gimple_assign_lhs (stmt
);
4226 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4229 code
= gimple_assign_rhs_code (stmt
);
4230 if (gimple_assign_single_p (stmt
)
4231 || code
== PAREN_EXPR
4232 || CONVERT_EXPR_CODE_P (code
))
4233 op
= gimple_assign_rhs1 (stmt
);
4237 if (code
== VIEW_CONVERT_EXPR
)
4238 op
= TREE_OPERAND (op
, 0);
4240 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4241 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4243 /* Multiple types in SLP are handled by creating the appropriate number of
4244 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4249 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4251 gcc_assert (ncopies
>= 1);
4253 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4255 if (dump_enabled_p ())
4256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4257 "use not simple.\n");
4261 /* We can handle NOP_EXPR conversions that do not change the number
4262 of elements or the vector size. */
4263 if ((CONVERT_EXPR_CODE_P (code
)
4264 || code
== VIEW_CONVERT_EXPR
)
4266 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4267 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4268 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4271 /* We do not handle bit-precision changes. */
4272 if ((CONVERT_EXPR_CODE_P (code
)
4273 || code
== VIEW_CONVERT_EXPR
)
4274 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4275 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4276 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4277 || ((TYPE_PRECISION (TREE_TYPE (op
))
4278 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4279 /* But a conversion that does not change the bit-pattern is ok. */
4280 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4281 > TYPE_PRECISION (TREE_TYPE (op
)))
4282 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4283 /* Conversion between boolean types of different sizes is
4284 a simple assignment in case their vectypes are same
4286 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4287 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4289 if (dump_enabled_p ())
4290 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4291 "type conversion to/from bit-precision "
4296 if (!vec_stmt
) /* transformation not required. */
4298 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4299 if (dump_enabled_p ())
4300 dump_printf_loc (MSG_NOTE
, vect_location
,
4301 "=== vectorizable_assignment ===\n");
4302 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4307 if (dump_enabled_p ())
4308 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4311 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4314 for (j
= 0; j
< ncopies
; j
++)
4318 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4320 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4322 /* Arguments are ready. create the new vector stmt. */
4323 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4325 if (CONVERT_EXPR_CODE_P (code
)
4326 || code
== VIEW_CONVERT_EXPR
)
4327 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4328 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4329 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4330 gimple_assign_set_lhs (new_stmt
, new_temp
);
4331 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4333 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4340 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4342 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4344 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4347 vec_oprnds
.release ();
4352 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4353 either as shift by a scalar or by a vector. */
4356 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4359 machine_mode vec_mode
;
4364 vectype
= get_vectype_for_scalar_type (scalar_type
);
4368 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4370 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4372 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4374 || (optab_handler (optab
, TYPE_MODE (vectype
))
4375 == CODE_FOR_nothing
))
4379 vec_mode
= TYPE_MODE (vectype
);
4380 icode
= (int) optab_handler (optab
, vec_mode
);
4381 if (icode
== CODE_FOR_nothing
)
4388 /* Function vectorizable_shift.
4390 Check if STMT performs a shift operation that can be vectorized.
4391 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4392 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4393 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4396 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4397 gimple
**vec_stmt
, slp_tree slp_node
)
4401 tree op0
, op1
= NULL
;
4402 tree vec_oprnd1
= NULL_TREE
;
4403 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4405 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4406 enum tree_code code
;
4407 machine_mode vec_mode
;
4411 machine_mode optab_op2_mode
;
4413 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4414 gimple
*new_stmt
= NULL
;
4415 stmt_vec_info prev_stmt_info
;
4422 vec
<tree
> vec_oprnds0
= vNULL
;
4423 vec
<tree
> vec_oprnds1
= vNULL
;
4426 bool scalar_shift_arg
= true;
4427 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4428 vec_info
*vinfo
= stmt_info
->vinfo
;
4431 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4434 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4438 /* Is STMT a vectorizable binary/unary operation? */
4439 if (!is_gimple_assign (stmt
))
4442 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4445 code
= gimple_assign_rhs_code (stmt
);
4447 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4448 || code
== RROTATE_EXPR
))
4451 scalar_dest
= gimple_assign_lhs (stmt
);
4452 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4453 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4454 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4456 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4458 "bit-precision shifts not supported.\n");
4462 op0
= gimple_assign_rhs1 (stmt
);
4463 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4467 "use not simple.\n");
4470 /* If op0 is an external or constant def use a vector type with
4471 the same size as the output vector type. */
4473 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4475 gcc_assert (vectype
);
4478 if (dump_enabled_p ())
4479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4480 "no vectype for scalar type\n");
4484 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4485 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4486 if (nunits_out
!= nunits_in
)
4489 op1
= gimple_assign_rhs2 (stmt
);
4490 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4492 if (dump_enabled_p ())
4493 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4494 "use not simple.\n");
4499 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4503 /* Multiple types in SLP are handled by creating the appropriate number of
4504 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4509 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4511 gcc_assert (ncopies
>= 1);
4513 /* Determine whether the shift amount is a vector, or scalar. If the
4514 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4516 if ((dt
[1] == vect_internal_def
4517 || dt
[1] == vect_induction_def
)
4519 scalar_shift_arg
= false;
4520 else if (dt
[1] == vect_constant_def
4521 || dt
[1] == vect_external_def
4522 || dt
[1] == vect_internal_def
)
4524 /* In SLP, need to check whether the shift count is the same,
4525 in loops if it is a constant or invariant, it is always
4529 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4532 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4533 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4534 scalar_shift_arg
= false;
4537 /* If the shift amount is computed by a pattern stmt we cannot
4538 use the scalar amount directly thus give up and use a vector
4540 if (dt
[1] == vect_internal_def
)
4542 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4543 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4544 scalar_shift_arg
= false;
4549 if (dump_enabled_p ())
4550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4551 "operand mode requires invariant argument.\n");
4555 /* Vector shifted by vector. */
4556 if (!scalar_shift_arg
)
4558 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4559 if (dump_enabled_p ())
4560 dump_printf_loc (MSG_NOTE
, vect_location
,
4561 "vector/vector shift/rotate found.\n");
4564 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4565 if (op1_vectype
== NULL_TREE
4566 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4568 if (dump_enabled_p ())
4569 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4570 "unusable type for last operand in"
4571 " vector/vector shift/rotate.\n");
4575 /* See if the machine has a vector shifted by scalar insn and if not
4576 then see if it has a vector shifted by vector insn. */
4579 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4581 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4583 if (dump_enabled_p ())
4584 dump_printf_loc (MSG_NOTE
, vect_location
,
4585 "vector/scalar shift/rotate found.\n");
4589 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4591 && (optab_handler (optab
, TYPE_MODE (vectype
))
4592 != CODE_FOR_nothing
))
4594 scalar_shift_arg
= false;
4596 if (dump_enabled_p ())
4597 dump_printf_loc (MSG_NOTE
, vect_location
,
4598 "vector/vector shift/rotate found.\n");
4600 /* Unlike the other binary operators, shifts/rotates have
4601 the rhs being int, instead of the same type as the lhs,
4602 so make sure the scalar is the right type if we are
4603 dealing with vectors of long long/long/short/char. */
4604 if (dt
[1] == vect_constant_def
)
4605 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4606 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4610 && TYPE_MODE (TREE_TYPE (vectype
))
4611 != TYPE_MODE (TREE_TYPE (op1
)))
4613 if (dump_enabled_p ())
4614 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4615 "unusable type for last operand in"
4616 " vector/vector shift/rotate.\n");
4619 if (vec_stmt
&& !slp_node
)
4621 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4622 op1
= vect_init_vector (stmt
, op1
,
4623 TREE_TYPE (vectype
), NULL
);
4630 /* Supportable by target? */
4633 if (dump_enabled_p ())
4634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4638 vec_mode
= TYPE_MODE (vectype
);
4639 icode
= (int) optab_handler (optab
, vec_mode
);
4640 if (icode
== CODE_FOR_nothing
)
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4644 "op not supported by target.\n");
4645 /* Check only during analysis. */
4646 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4647 || (vf
< vect_min_worthwhile_factor (code
)
4650 if (dump_enabled_p ())
4651 dump_printf_loc (MSG_NOTE
, vect_location
,
4652 "proceeding using word mode.\n");
4655 /* Worthwhile without SIMD support? Check only during analysis. */
4656 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4657 && vf
< vect_min_worthwhile_factor (code
)
4660 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4662 "not worthwhile without SIMD support.\n");
4666 if (!vec_stmt
) /* transformation not required. */
4668 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4669 if (dump_enabled_p ())
4670 dump_printf_loc (MSG_NOTE
, vect_location
,
4671 "=== vectorizable_shift ===\n");
4672 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_NOTE
, vect_location
,
4680 "transform binary/unary operation.\n");
4683 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4685 prev_stmt_info
= NULL
;
4686 for (j
= 0; j
< ncopies
; j
++)
4691 if (scalar_shift_arg
)
4693 /* Vector shl and shr insn patterns can be defined with scalar
4694 operand 2 (shift operand). In this case, use constant or loop
4695 invariant op1 directly, without extending it to vector mode
4697 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4698 if (!VECTOR_MODE_P (optab_op2_mode
))
4700 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_NOTE
, vect_location
,
4702 "operand 1 using scalar mode.\n");
4704 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4705 vec_oprnds1
.quick_push (vec_oprnd1
);
4708 /* Store vec_oprnd1 for every vector stmt to be created
4709 for SLP_NODE. We check during the analysis that all
4710 the shift arguments are the same.
4711 TODO: Allow different constants for different vector
4712 stmts generated for an SLP instance. */
4713 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4714 vec_oprnds1
.quick_push (vec_oprnd1
);
4719 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4720 (a special case for certain kind of vector shifts); otherwise,
4721 operand 1 should be of a vector type (the usual case). */
4723 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4726 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4730 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4732 /* Arguments are ready. Create the new vector stmt. */
4733 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4735 vop1
= vec_oprnds1
[i
];
4736 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4737 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4738 gimple_assign_set_lhs (new_stmt
, new_temp
);
4739 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4741 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4748 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4750 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4751 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4754 vec_oprnds0
.release ();
4755 vec_oprnds1
.release ();
4761 /* Function vectorizable_operation.
4763 Check if STMT performs a binary, unary or ternary operation that can
4765 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4766 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4767 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4770 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4771 gimple
**vec_stmt
, slp_tree slp_node
)
4775 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4776 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4778 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4779 enum tree_code code
;
4780 machine_mode vec_mode
;
4784 bool target_support_p
;
4786 enum vect_def_type dt
[3]
4787 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4788 gimple
*new_stmt
= NULL
;
4789 stmt_vec_info prev_stmt_info
;
4795 vec
<tree
> vec_oprnds0
= vNULL
;
4796 vec
<tree
> vec_oprnds1
= vNULL
;
4797 vec
<tree
> vec_oprnds2
= vNULL
;
4798 tree vop0
, vop1
, vop2
;
4799 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4800 vec_info
*vinfo
= stmt_info
->vinfo
;
4803 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4806 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4810 /* Is STMT a vectorizable binary/unary operation? */
4811 if (!is_gimple_assign (stmt
))
4814 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4817 code
= gimple_assign_rhs_code (stmt
);
4819 /* For pointer addition, we should use the normal plus for
4820 the vector addition. */
4821 if (code
== POINTER_PLUS_EXPR
)
4824 /* Support only unary or binary operations. */
4825 op_type
= TREE_CODE_LENGTH (code
);
4826 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4828 if (dump_enabled_p ())
4829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4830 "num. args = %d (not unary/binary/ternary op).\n",
4835 scalar_dest
= gimple_assign_lhs (stmt
);
4836 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4838 /* Most operations cannot handle bit-precision types without extra
4840 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4841 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4842 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4843 /* Exception are bitwise binary operations. */
4844 && code
!= BIT_IOR_EXPR
4845 && code
!= BIT_XOR_EXPR
4846 && code
!= BIT_AND_EXPR
)
4848 if (dump_enabled_p ())
4849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4850 "bit-precision arithmetic not supported.\n");
4854 op0
= gimple_assign_rhs1 (stmt
);
4855 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4859 "use not simple.\n");
4862 /* If op0 is an external or constant def use a vector type with
4863 the same size as the output vector type. */
4866 /* For boolean type we cannot determine vectype by
4867 invariant value (don't know whether it is a vector
4868 of booleans or vector of integers). We use output
4869 vectype because operations on boolean don't change
4871 if (TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
)
4873 if (TREE_CODE (TREE_TYPE (scalar_dest
)) != BOOLEAN_TYPE
)
4875 if (dump_enabled_p ())
4876 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4877 "not supported operation on bool value.\n");
4880 vectype
= vectype_out
;
4883 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4886 gcc_assert (vectype
);
4889 if (dump_enabled_p ())
4891 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4892 "no vectype for scalar type ");
4893 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4895 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4901 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4902 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4903 if (nunits_out
!= nunits_in
)
4906 if (op_type
== binary_op
|| op_type
== ternary_op
)
4908 op1
= gimple_assign_rhs2 (stmt
);
4909 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
4911 if (dump_enabled_p ())
4912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4913 "use not simple.\n");
4917 if (op_type
== ternary_op
)
4919 op2
= gimple_assign_rhs3 (stmt
);
4920 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
4922 if (dump_enabled_p ())
4923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4924 "use not simple.\n");
4930 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4934 /* Multiple types in SLP are handled by creating the appropriate number of
4935 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4940 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4942 gcc_assert (ncopies
>= 1);
4944 /* Shifts are handled in vectorizable_shift (). */
4945 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4946 || code
== RROTATE_EXPR
)
4949 /* Supportable by target? */
4951 vec_mode
= TYPE_MODE (vectype
);
4952 if (code
== MULT_HIGHPART_EXPR
)
4953 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
4956 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4959 if (dump_enabled_p ())
4960 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4964 target_support_p
= (optab_handler (optab
, vec_mode
)
4965 != CODE_FOR_nothing
);
4968 if (!target_support_p
)
4970 if (dump_enabled_p ())
4971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4972 "op not supported by target.\n");
4973 /* Check only during analysis. */
4974 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4975 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4977 if (dump_enabled_p ())
4978 dump_printf_loc (MSG_NOTE
, vect_location
,
4979 "proceeding using word mode.\n");
4982 /* Worthwhile without SIMD support? Check only during analysis. */
4983 if (!VECTOR_MODE_P (vec_mode
)
4985 && vf
< vect_min_worthwhile_factor (code
))
4987 if (dump_enabled_p ())
4988 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4989 "not worthwhile without SIMD support.\n");
4993 if (!vec_stmt
) /* transformation not required. */
4995 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_NOTE
, vect_location
,
4998 "=== vectorizable_operation ===\n");
4999 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5005 if (dump_enabled_p ())
5006 dump_printf_loc (MSG_NOTE
, vect_location
,
5007 "transform binary/unary operation.\n");
5010 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5012 /* In case the vectorization factor (VF) is bigger than the number
5013 of elements that we can fit in a vectype (nunits), we have to generate
5014 more than one vector stmt - i.e - we need to "unroll" the
5015 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5016 from one copy of the vector stmt to the next, in the field
5017 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5018 stages to find the correct vector defs to be used when vectorizing
5019 stmts that use the defs of the current stmt. The example below
5020 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5021 we need to create 4 vectorized stmts):
5023 before vectorization:
5024 RELATED_STMT VEC_STMT
5028 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5030 RELATED_STMT VEC_STMT
5031 VS1_0: vx0 = memref0 VS1_1 -
5032 VS1_1: vx1 = memref1 VS1_2 -
5033 VS1_2: vx2 = memref2 VS1_3 -
5034 VS1_3: vx3 = memref3 - -
5035 S1: x = load - VS1_0
5038 step2: vectorize stmt S2 (done here):
5039 To vectorize stmt S2 we first need to find the relevant vector
5040 def for the first operand 'x'. This is, as usual, obtained from
5041 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5042 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5043 relevant vector def 'vx0'. Having found 'vx0' we can generate
5044 the vector stmt VS2_0, and as usual, record it in the
5045 STMT_VINFO_VEC_STMT of stmt S2.
5046 When creating the second copy (VS2_1), we obtain the relevant vector
5047 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5048 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5049 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5050 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5051 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5052 chain of stmts and pointers:
5053 RELATED_STMT VEC_STMT
5054 VS1_0: vx0 = memref0 VS1_1 -
5055 VS1_1: vx1 = memref1 VS1_2 -
5056 VS1_2: vx2 = memref2 VS1_3 -
5057 VS1_3: vx3 = memref3 - -
5058 S1: x = load - VS1_0
5059 VS2_0: vz0 = vx0 + v1 VS2_1 -
5060 VS2_1: vz1 = vx1 + v1 VS2_2 -
5061 VS2_2: vz2 = vx2 + v1 VS2_3 -
5062 VS2_3: vz3 = vx3 + v1 - -
5063 S2: z = x + 1 - VS2_0 */
5065 prev_stmt_info
= NULL
;
5066 for (j
= 0; j
< ncopies
; j
++)
5071 if (op_type
== binary_op
|| op_type
== ternary_op
)
5072 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5075 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5077 if (op_type
== ternary_op
)
5079 vec_oprnds2
.create (1);
5080 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
5086 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5087 if (op_type
== ternary_op
)
5089 tree vec_oprnd
= vec_oprnds2
.pop ();
5090 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5095 /* Arguments are ready. Create the new vector stmt. */
5096 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5098 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5099 ? vec_oprnds1
[i
] : NULL_TREE
);
5100 vop2
= ((op_type
== ternary_op
)
5101 ? vec_oprnds2
[i
] : NULL_TREE
);
5102 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5103 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5104 gimple_assign_set_lhs (new_stmt
, new_temp
);
5105 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5107 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5114 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5116 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5117 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5120 vec_oprnds0
.release ();
5121 vec_oprnds1
.release ();
5122 vec_oprnds2
.release ();
5127 /* A helper function to ensure data reference DR's base alignment
5131 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5136 if (DR_VECT_AUX (dr
)->base_misaligned
)
5138 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5139 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5141 if (decl_in_symtab_p (base_decl
))
5142 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5145 SET_DECL_ALIGN (base_decl
, TYPE_ALIGN (vectype
));
5146 DECL_USER_ALIGN (base_decl
) = 1;
5148 DR_VECT_AUX (dr
)->base_misaligned
= false;
5153 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5154 reversal of the vector elements. If that is impossible to do,
5158 perm_mask_for_reverse (tree vectype
)
5163 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5164 sel
= XALLOCAVEC (unsigned char, nunits
);
5166 for (i
= 0; i
< nunits
; ++i
)
5167 sel
[i
] = nunits
- 1 - i
;
5169 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5171 return vect_gen_perm_mask_checked (vectype
, sel
);
5174 /* Function vectorizable_store.
5176 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5183 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5189 tree vec_oprnd
= NULL_TREE
;
5190 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5191 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5193 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5194 struct loop
*loop
= NULL
;
5195 machine_mode vec_mode
;
5197 enum dr_alignment_support alignment_support_scheme
;
5199 enum vect_def_type dt
;
5200 stmt_vec_info prev_stmt_info
= NULL
;
5201 tree dataref_ptr
= NULL_TREE
;
5202 tree dataref_offset
= NULL_TREE
;
5203 gimple
*ptr_incr
= NULL
;
5206 gimple
*next_stmt
, *first_stmt
= NULL
;
5207 bool grouped_store
= false;
5208 bool store_lanes_p
= false;
5209 unsigned int group_size
, i
;
5210 vec
<tree
> dr_chain
= vNULL
;
5211 vec
<tree
> oprnds
= vNULL
;
5212 vec
<tree
> result_chain
= vNULL
;
5214 bool negative
= false;
5215 tree offset
= NULL_TREE
;
5216 vec
<tree
> vec_oprnds
= vNULL
;
5217 bool slp
= (slp_node
!= NULL
);
5218 unsigned int vec_num
;
5219 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5220 vec_info
*vinfo
= stmt_info
->vinfo
;
5222 tree scatter_base
= NULL_TREE
, scatter_off
= NULL_TREE
;
5223 tree scatter_off_vectype
= NULL_TREE
, scatter_decl
= NULL_TREE
;
5224 int scatter_scale
= 1;
5225 enum vect_def_type scatter_idx_dt
= vect_unknown_def_type
;
5226 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5229 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5232 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5236 /* Is vectorizable store? */
5238 if (!is_gimple_assign (stmt
))
5241 scalar_dest
= gimple_assign_lhs (stmt
);
5242 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5243 && is_pattern_stmt_p (stmt_info
))
5244 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5245 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5246 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5247 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5248 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5249 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5250 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5251 && TREE_CODE (scalar_dest
) != MEM_REF
)
5254 /* Cannot have hybrid store SLP -- that would mean storing to the
5255 same location twice. */
5256 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5258 gcc_assert (gimple_assign_single_p (stmt
));
5260 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5261 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5264 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5266 /* Multiple types in SLP are handled by creating the appropriate number of
5267 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5272 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5274 gcc_assert (ncopies
>= 1);
5276 /* FORNOW. This restriction should be relaxed. */
5277 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5281 "multiple types in nested loop.\n");
5285 op
= gimple_assign_rhs1 (stmt
);
5287 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5289 if (dump_enabled_p ())
5290 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5291 "use not simple.\n");
5295 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5298 elem_type
= TREE_TYPE (vectype
);
5299 vec_mode
= TYPE_MODE (vectype
);
5301 /* FORNOW. In some cases can vectorize even if data-type not supported
5302 (e.g. - array initialization with 0). */
5303 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5306 if (!STMT_VINFO_DATA_REF (stmt_info
))
5309 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5312 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5313 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5314 size_zero_node
) < 0;
5315 if (negative
&& ncopies
> 1)
5317 if (dump_enabled_p ())
5318 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5319 "multiple types with negative step.\n");
5324 gcc_assert (!grouped_store
);
5325 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5326 if (alignment_support_scheme
!= dr_aligned
5327 && alignment_support_scheme
!= dr_unaligned_supported
)
5329 if (dump_enabled_p ())
5330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5331 "negative step but alignment required.\n");
5334 if (dt
!= vect_constant_def
5335 && dt
!= vect_external_def
5336 && !perm_mask_for_reverse (vectype
))
5338 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5340 "negative step and reversing not supported.\n");
5346 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5348 grouped_store
= true;
5349 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5350 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5351 if (!slp
&& !STMT_VINFO_STRIDED_P (stmt_info
))
5353 if (vect_store_lanes_supported (vectype
, group_size
))
5354 store_lanes_p
= true;
5355 else if (!vect_grouped_store_supported (vectype
, group_size
))
5359 if (STMT_VINFO_STRIDED_P (stmt_info
)
5361 && (group_size
> nunits
5362 || nunits
% group_size
!= 0))
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5365 "unhandled strided group store\n");
5369 if (first_stmt
== stmt
)
5371 /* STMT is the leader of the group. Check the operands of all the
5372 stmts of the group. */
5373 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5376 gcc_assert (gimple_assign_single_p (next_stmt
));
5377 op
= gimple_assign_rhs1 (next_stmt
);
5378 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5382 "use not simple.\n");
5385 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5390 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5393 scatter_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &scatter_base
,
5394 &scatter_off
, &scatter_scale
);
5395 gcc_assert (scatter_decl
);
5396 if (!vect_is_simple_use (scatter_off
, vinfo
, &def_stmt
, &scatter_idx_dt
,
5397 &scatter_off_vectype
))
5399 if (dump_enabled_p ())
5400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5401 "scatter index use not simple.");
5406 if (!vec_stmt
) /* transformation not required. */
5408 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5409 /* The SLP costs are calculated during SLP analysis. */
5410 if (!PURE_SLP_STMT (stmt_info
))
5411 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5418 ensure_base_align (stmt_info
, dr
);
5420 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5422 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5423 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (scatter_decl
));
5424 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5425 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5426 edge pe
= loop_preheader_edge (loop
);
5429 enum { NARROW
, NONE
, WIDEN
} modifier
;
5430 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (scatter_off_vectype
);
5432 if (nunits
== (unsigned int) scatter_off_nunits
)
5434 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5436 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5439 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5440 sel
[i
] = i
| nunits
;
5442 perm_mask
= vect_gen_perm_mask_checked (scatter_off_vectype
, sel
);
5443 gcc_assert (perm_mask
!= NULL_TREE
);
5445 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5447 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5450 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5451 sel
[i
] = i
| scatter_off_nunits
;
5453 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5454 gcc_assert (perm_mask
!= NULL_TREE
);
5460 rettype
= TREE_TYPE (TREE_TYPE (scatter_decl
));
5461 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5462 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5463 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5464 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5465 scaletype
= TREE_VALUE (arglist
);
5467 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5468 && TREE_CODE (rettype
) == VOID_TYPE
);
5470 ptr
= fold_convert (ptrtype
, scatter_base
);
5471 if (!is_gimple_min_invariant (ptr
))
5473 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5474 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5475 gcc_assert (!new_bb
);
5478 /* Currently we support only unconditional scatter stores,
5479 so mask should be all ones. */
5480 mask
= build_int_cst (masktype
, -1);
5481 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5483 scale
= build_int_cst (scaletype
, scatter_scale
);
5485 prev_stmt_info
= NULL
;
5486 for (j
= 0; j
< ncopies
; ++j
)
5491 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5493 = vect_get_vec_def_for_operand (scatter_off
, stmt
);
5495 else if (modifier
!= NONE
&& (j
& 1))
5497 if (modifier
== WIDEN
)
5500 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5501 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5504 else if (modifier
== NARROW
)
5506 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5509 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5517 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5519 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5522 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5524 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5525 == TYPE_VECTOR_SUBPARTS (srctype
));
5526 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5527 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5528 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5529 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5533 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5535 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5536 == TYPE_VECTOR_SUBPARTS (idxtype
));
5537 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5538 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5539 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5540 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5545 = gimple_build_call (scatter_decl
, 5, ptr
, mask
, op
, src
, scale
);
5547 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5549 if (prev_stmt_info
== NULL
)
5550 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5552 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5553 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5560 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5561 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5563 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5566 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5568 /* We vectorize all the stmts of the interleaving group when we
5569 reach the last stmt in the group. */
5570 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5571 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5580 grouped_store
= false;
5581 /* VEC_NUM is the number of vect stmts to be created for this
5583 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5584 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5585 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5586 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5587 op
= gimple_assign_rhs1 (first_stmt
);
5590 /* VEC_NUM is the number of vect stmts to be created for this
5592 vec_num
= group_size
;
5598 group_size
= vec_num
= 1;
5601 if (dump_enabled_p ())
5602 dump_printf_loc (MSG_NOTE
, vect_location
,
5603 "transform store. ncopies = %d\n", ncopies
);
5605 if (STMT_VINFO_STRIDED_P (stmt_info
))
5607 gimple_stmt_iterator incr_gsi
;
5613 gimple_seq stmts
= NULL
;
5614 tree stride_base
, stride_step
, alias_off
;
5618 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5621 = fold_build_pointer_plus
5622 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5623 size_binop (PLUS_EXPR
,
5624 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5625 convert_to_ptrofftype (DR_INIT(first_dr
))));
5626 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5628 /* For a store with loop-invariant (but other than power-of-2)
5629 stride (i.e. not a grouped access) like so:
5631 for (i = 0; i < n; i += stride)
5634 we generate a new induction variable and new stores from
5635 the components of the (vectorized) rhs:
5637 for (j = 0; ; j += VF*stride)
5642 array[j + stride] = tmp2;
5646 unsigned nstores
= nunits
;
5647 tree ltype
= elem_type
;
5650 nstores
= nunits
/ group_size
;
5651 if (group_size
< nunits
)
5652 ltype
= build_vector_type (elem_type
, group_size
);
5655 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5656 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5660 ivstep
= stride_step
;
5661 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5662 build_int_cst (TREE_TYPE (ivstep
),
5663 ncopies
* nstores
));
5665 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5667 create_iv (stride_base
, ivstep
, NULL
,
5668 loop
, &incr_gsi
, insert_after
,
5670 incr
= gsi_stmt (incr_gsi
);
5671 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5673 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5675 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5677 prev_stmt_info
= NULL
;
5678 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5679 next_stmt
= first_stmt
;
5680 for (g
= 0; g
< group_size
; g
++)
5682 running_off
= offvar
;
5685 tree size
= TYPE_SIZE_UNIT (ltype
);
5686 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5688 tree newoff
= copy_ssa_name (running_off
, NULL
);
5689 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5691 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5692 running_off
= newoff
;
5694 for (j
= 0; j
< ncopies
; j
++)
5696 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5697 and first_stmt == stmt. */
5702 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5704 vec_oprnd
= vec_oprnds
[0];
5708 gcc_assert (gimple_assign_single_p (next_stmt
));
5709 op
= gimple_assign_rhs1 (next_stmt
);
5710 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5716 vec_oprnd
= vec_oprnds
[j
];
5719 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
5720 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5724 for (i
= 0; i
< nstores
; i
++)
5726 tree newref
, newoff
;
5727 gimple
*incr
, *assign
;
5728 tree size
= TYPE_SIZE (ltype
);
5729 /* Extract the i'th component. */
5730 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5731 bitsize_int (i
), size
);
5732 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5735 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5739 newref
= build2 (MEM_REF
, ltype
,
5740 running_off
, alias_off
);
5742 /* And store it to *running_off. */
5743 assign
= gimple_build_assign (newref
, elem
);
5744 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5746 newoff
= copy_ssa_name (running_off
, NULL
);
5747 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5748 running_off
, stride_step
);
5749 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5751 running_off
= newoff
;
5752 if (g
== group_size
- 1
5755 if (j
== 0 && i
== 0)
5756 STMT_VINFO_VEC_STMT (stmt_info
)
5757 = *vec_stmt
= assign
;
5759 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5760 prev_stmt_info
= vinfo_for_stmt (assign
);
5764 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5769 dr_chain
.create (group_size
);
5770 oprnds
.create (group_size
);
5772 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5773 gcc_assert (alignment_support_scheme
);
5774 /* Targets with store-lane instructions must not require explicit
5776 gcc_assert (!store_lanes_p
5777 || alignment_support_scheme
== dr_aligned
5778 || alignment_support_scheme
== dr_unaligned_supported
);
5781 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5784 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5786 aggr_type
= vectype
;
5788 /* In case the vectorization factor (VF) is bigger than the number
5789 of elements that we can fit in a vectype (nunits), we have to generate
5790 more than one vector stmt - i.e - we need to "unroll" the
5791 vector stmt by a factor VF/nunits. For more details see documentation in
5792 vect_get_vec_def_for_copy_stmt. */
5794 /* In case of interleaving (non-unit grouped access):
5801 We create vectorized stores starting from base address (the access of the
5802 first stmt in the chain (S2 in the above example), when the last store stmt
5803 of the chain (S4) is reached:
5806 VS2: &base + vec_size*1 = vx0
5807 VS3: &base + vec_size*2 = vx1
5808 VS4: &base + vec_size*3 = vx3
5810 Then permutation statements are generated:
5812 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5813 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5816 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5817 (the order of the data-refs in the output of vect_permute_store_chain
5818 corresponds to the order of scalar stmts in the interleaving chain - see
5819 the documentation of vect_permute_store_chain()).
5821 In case of both multiple types and interleaving, above vector stores and
5822 permutation stmts are created for every copy. The result vector stmts are
5823 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5824 STMT_VINFO_RELATED_STMT for the next copies.
5827 prev_stmt_info
= NULL
;
5828 for (j
= 0; j
< ncopies
; j
++)
5835 /* Get vectorized arguments for SLP_NODE. */
5836 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5837 NULL
, slp_node
, -1);
5839 vec_oprnd
= vec_oprnds
[0];
5843 /* For interleaved stores we collect vectorized defs for all the
5844 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5845 used as an input to vect_permute_store_chain(), and OPRNDS as
5846 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5848 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5849 OPRNDS are of size 1. */
5850 next_stmt
= first_stmt
;
5851 for (i
= 0; i
< group_size
; i
++)
5853 /* Since gaps are not supported for interleaved stores,
5854 GROUP_SIZE is the exact number of stmts in the chain.
5855 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5856 there is no interleaving, GROUP_SIZE is 1, and only one
5857 iteration of the loop will be executed. */
5858 gcc_assert (next_stmt
5859 && gimple_assign_single_p (next_stmt
));
5860 op
= gimple_assign_rhs1 (next_stmt
);
5862 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5863 dr_chain
.quick_push (vec_oprnd
);
5864 oprnds
.quick_push (vec_oprnd
);
5865 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5869 /* We should have catched mismatched types earlier. */
5870 gcc_assert (useless_type_conversion_p (vectype
,
5871 TREE_TYPE (vec_oprnd
)));
5872 bool simd_lane_access_p
5873 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5874 if (simd_lane_access_p
5875 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5876 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5877 && integer_zerop (DR_OFFSET (first_dr
))
5878 && integer_zerop (DR_INIT (first_dr
))
5879 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5880 get_alias_set (DR_REF (first_dr
))))
5882 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5883 dataref_offset
= build_int_cst (reference_alias_ptr_type
5884 (DR_REF (first_dr
)), 0);
5889 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5890 simd_lane_access_p
? loop
: NULL
,
5891 offset
, &dummy
, gsi
, &ptr_incr
,
5892 simd_lane_access_p
, &inv_p
);
5893 gcc_assert (bb_vinfo
|| !inv_p
);
5897 /* For interleaved stores we created vectorized defs for all the
5898 defs stored in OPRNDS in the previous iteration (previous copy).
5899 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5900 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5902 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5903 OPRNDS are of size 1. */
5904 for (i
= 0; i
< group_size
; i
++)
5907 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
5908 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5909 dr_chain
[i
] = vec_oprnd
;
5910 oprnds
[i
] = vec_oprnd
;
5914 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5915 TYPE_SIZE_UNIT (aggr_type
));
5917 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5918 TYPE_SIZE_UNIT (aggr_type
));
5925 /* Combine all the vectors into an array. */
5926 vec_array
= create_vector_array (vectype
, vec_num
);
5927 for (i
= 0; i
< vec_num
; i
++)
5929 vec_oprnd
= dr_chain
[i
];
5930 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5934 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5935 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5936 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5937 gimple_call_set_lhs (new_stmt
, data_ref
);
5938 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5946 result_chain
.create (group_size
);
5948 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5952 next_stmt
= first_stmt
;
5953 for (i
= 0; i
< vec_num
; i
++)
5955 unsigned align
, misalign
;
5958 /* Bump the vector pointer. */
5959 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5963 vec_oprnd
= vec_oprnds
[i
];
5964 else if (grouped_store
)
5965 /* For grouped stores vectorized defs are interleaved in
5966 vect_permute_store_chain(). */
5967 vec_oprnd
= result_chain
[i
];
5969 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
5973 : build_int_cst (reference_alias_ptr_type
5974 (DR_REF (first_dr
)), 0));
5975 align
= TYPE_ALIGN_UNIT (vectype
);
5976 if (aligned_access_p (first_dr
))
5978 else if (DR_MISALIGNMENT (first_dr
) == -1)
5980 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
5981 align
= TYPE_ALIGN_UNIT (elem_type
);
5983 align
= get_object_alignment (DR_REF (first_dr
))
5986 TREE_TYPE (data_ref
)
5987 = build_aligned_type (TREE_TYPE (data_ref
),
5988 align
* BITS_PER_UNIT
);
5992 TREE_TYPE (data_ref
)
5993 = build_aligned_type (TREE_TYPE (data_ref
),
5994 TYPE_ALIGN (elem_type
));
5995 misalign
= DR_MISALIGNMENT (first_dr
);
5997 if (dataref_offset
== NULL_TREE
5998 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
5999 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6003 && dt
!= vect_constant_def
6004 && dt
!= vect_external_def
)
6006 tree perm_mask
= perm_mask_for_reverse (vectype
);
6008 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6010 tree new_temp
= make_ssa_name (perm_dest
);
6012 /* Generate the permute statement. */
6014 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6015 vec_oprnd
, perm_mask
);
6016 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6018 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6019 vec_oprnd
= new_temp
;
6022 /* Arguments are ready. Create the new vector stmt. */
6023 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6024 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6029 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6037 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6039 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6040 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6044 dr_chain
.release ();
6046 result_chain
.release ();
6047 vec_oprnds
.release ();
6052 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6053 VECTOR_CST mask. No checks are made that the target platform supports the
6054 mask, so callers may wish to test can_vec_perm_p separately, or use
6055 vect_gen_perm_mask_checked. */
6058 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6060 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6063 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6065 mask_elt_type
= lang_hooks
.types
.type_for_mode
6066 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6067 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6069 mask_elts
= XALLOCAVEC (tree
, nunits
);
6070 for (i
= nunits
- 1; i
>= 0; i
--)
6071 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6072 mask_vec
= build_vector (mask_type
, mask_elts
);
6077 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6078 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6081 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6083 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6084 return vect_gen_perm_mask_any (vectype
, sel
);
6087 /* Given a vector variable X and Y, that was generated for the scalar
6088 STMT, generate instructions to permute the vector elements of X and Y
6089 using permutation mask MASK_VEC, insert them at *GSI and return the
6090 permuted vector variable. */
6093 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6094 gimple_stmt_iterator
*gsi
)
6096 tree vectype
= TREE_TYPE (x
);
6097 tree perm_dest
, data_ref
;
6100 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6101 data_ref
= make_ssa_name (perm_dest
);
6103 /* Generate the permute statement. */
6104 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6105 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6110 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6111 inserting them on the loops preheader edge. Returns true if we
6112 were successful in doing so (and thus STMT can be moved then),
6113 otherwise returns false. */
6116 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6122 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6124 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6125 if (!gimple_nop_p (def_stmt
)
6126 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6128 /* Make sure we don't need to recurse. While we could do
6129 so in simple cases when there are more complex use webs
6130 we don't have an easy way to preserve stmt order to fulfil
6131 dependencies within them. */
6134 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6136 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6138 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6139 if (!gimple_nop_p (def_stmt2
)
6140 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6150 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6152 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6153 if (!gimple_nop_p (def_stmt
)
6154 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6156 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6157 gsi_remove (&gsi
, false);
6158 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6165 /* vectorizable_load.
6167 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6169 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6170 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6171 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6174 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6175 slp_tree slp_node
, slp_instance slp_node_instance
)
6178 tree vec_dest
= NULL
;
6179 tree data_ref
= NULL
;
6180 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6181 stmt_vec_info prev_stmt_info
;
6182 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6183 struct loop
*loop
= NULL
;
6184 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6185 bool nested_in_vect_loop
= false;
6186 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6190 gimple
*new_stmt
= NULL
;
6192 enum dr_alignment_support alignment_support_scheme
;
6193 tree dataref_ptr
= NULL_TREE
;
6194 tree dataref_offset
= NULL_TREE
;
6195 gimple
*ptr_incr
= NULL
;
6197 int i
, j
, group_size
= -1, group_gap_adj
;
6198 tree msq
= NULL_TREE
, lsq
;
6199 tree offset
= NULL_TREE
;
6200 tree byte_offset
= NULL_TREE
;
6201 tree realignment_token
= NULL_TREE
;
6203 vec
<tree
> dr_chain
= vNULL
;
6204 bool grouped_load
= false;
6205 bool load_lanes_p
= false;
6207 gimple
*first_stmt_for_drptr
= NULL
;
6209 bool negative
= false;
6210 bool compute_in_loop
= false;
6211 struct loop
*at_loop
;
6213 bool slp
= (slp_node
!= NULL
);
6214 bool slp_perm
= false;
6215 enum tree_code code
;
6216 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6219 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
6220 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
6221 int gather_scale
= 1;
6222 enum vect_def_type gather_dt
= vect_unknown_def_type
;
6223 vec_info
*vinfo
= stmt_info
->vinfo
;
6225 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6228 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6232 /* Is vectorizable load? */
6233 if (!is_gimple_assign (stmt
))
6236 scalar_dest
= gimple_assign_lhs (stmt
);
6237 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6240 code
= gimple_assign_rhs_code (stmt
);
6241 if (code
!= ARRAY_REF
6242 && code
!= BIT_FIELD_REF
6243 && code
!= INDIRECT_REF
6244 && code
!= COMPONENT_REF
6245 && code
!= IMAGPART_EXPR
6246 && code
!= REALPART_EXPR
6248 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6251 if (!STMT_VINFO_DATA_REF (stmt_info
))
6254 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6255 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6259 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6260 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6261 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6266 /* Multiple types in SLP are handled by creating the appropriate number of
6267 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6272 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6274 gcc_assert (ncopies
>= 1);
6276 /* FORNOW. This restriction should be relaxed. */
6277 if (nested_in_vect_loop
&& ncopies
> 1)
6279 if (dump_enabled_p ())
6280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6281 "multiple types in nested loop.\n");
6285 /* Invalidate assumptions made by dependence analysis when vectorization
6286 on the unrolled body effectively re-orders stmts. */
6288 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6289 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6290 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6292 if (dump_enabled_p ())
6293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6294 "cannot perform implicit CSE when unrolling "
6295 "with negative dependence distance\n");
6299 elem_type
= TREE_TYPE (vectype
);
6300 mode
= TYPE_MODE (vectype
);
6302 /* FORNOW. In some cases can vectorize even if data-type not supported
6303 (e.g. - data copies). */
6304 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6308 "Aligned load, but unsupported type.\n");
6312 /* Check if the load is a part of an interleaving chain. */
6313 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6315 grouped_load
= true;
6317 gcc_assert (!nested_in_vect_loop
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6319 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6320 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6322 if (!slp
&& !STMT_VINFO_STRIDED_P (stmt_info
))
6324 if (vect_load_lanes_supported (vectype
, group_size
))
6325 load_lanes_p
= true;
6326 else if (!vect_grouped_load_supported (vectype
, group_size
))
6330 /* If this is single-element interleaving with an element distance
6331 that leaves unused vector loads around punt - we at least create
6332 very sub-optimal code in that case (and blow up memory,
6334 if (first_stmt
== stmt
6335 && !GROUP_NEXT_ELEMENT (stmt_info
))
6337 if (GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
6339 if (dump_enabled_p ())
6340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6341 "single-element interleaving not supported "
6342 "for not adjacent vector loads\n");
6346 /* Single-element interleaving requires peeling for gaps. */
6347 gcc_assert (GROUP_GAP (stmt_info
));
6350 /* If there is a gap in the end of the group or the group size cannot
6351 be made a multiple of the vector element count then we access excess
6352 elements in the last iteration and thus need to peel that off. */
6354 && ! STMT_VINFO_STRIDED_P (stmt_info
)
6355 && (GROUP_GAP (vinfo_for_stmt (first_stmt
)) != 0
6356 || (!slp
&& !load_lanes_p
&& vf
% group_size
!= 0)))
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6360 "Data access with gaps requires scalar "
6364 if (dump_enabled_p ())
6365 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6366 "Peeling for outer loop is not supported\n");
6370 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
6373 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6376 /* ??? The following is overly pessimistic (as well as the loop
6377 case above) in the case we can statically determine the excess
6378 elements loaded are within the bounds of a decl that is accessed.
6379 Likewise for BB vectorizations using masked loads is a possibility. */
6380 if (bb_vinfo
&& slp_perm
&& group_size
% nunits
!= 0)
6382 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6383 "BB vectorization with gaps at the end of a load "
6384 "is not supported\n");
6388 /* Invalidate assumptions made by dependence analysis when vectorization
6389 on the unrolled body effectively re-orders stmts. */
6390 if (!PURE_SLP_STMT (stmt_info
)
6391 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6392 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6393 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6395 if (dump_enabled_p ())
6396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6397 "cannot perform implicit CSE when performing "
6398 "group loads with negative dependence distance\n");
6402 /* Similarly when the stmt is a load that is both part of a SLP
6403 instance and a loop vectorized stmt via the same-dr mechanism
6404 we have to give up. */
6405 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6406 && (STMT_SLP_TYPE (stmt_info
)
6407 != STMT_SLP_TYPE (vinfo_for_stmt
6408 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6410 if (dump_enabled_p ())
6411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6412 "conflicting SLP types for CSEd load\n");
6418 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6421 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
6422 &gather_off
, &gather_scale
);
6423 gcc_assert (gather_decl
);
6424 if (!vect_is_simple_use (gather_off
, vinfo
, &def_stmt
, &gather_dt
,
6425 &gather_off_vectype
))
6427 if (dump_enabled_p ())
6428 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6429 "gather index use not simple.\n");
6433 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6437 && (group_size
> nunits
6438 || nunits
% group_size
!= 0))
6440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6441 "unhandled strided group load\n");
6447 negative
= tree_int_cst_compare (nested_in_vect_loop
6448 ? STMT_VINFO_DR_STEP (stmt_info
)
6450 size_zero_node
) < 0;
6451 if (negative
&& ncopies
> 1)
6453 if (dump_enabled_p ())
6454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6455 "multiple types with negative step.\n");
6463 if (dump_enabled_p ())
6464 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6465 "negative step for group load not supported"
6469 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6470 if (alignment_support_scheme
!= dr_aligned
6471 && alignment_support_scheme
!= dr_unaligned_supported
)
6473 if (dump_enabled_p ())
6474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6475 "negative step but alignment required.\n");
6478 if (!perm_mask_for_reverse (vectype
))
6480 if (dump_enabled_p ())
6481 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6482 "negative step and reversing not supported."
6489 if (!vec_stmt
) /* transformation not required. */
6491 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6492 /* The SLP costs are calculated during SLP analysis. */
6493 if (!PURE_SLP_STMT (stmt_info
))
6494 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6499 if (dump_enabled_p ())
6500 dump_printf_loc (MSG_NOTE
, vect_location
,
6501 "transform load. ncopies = %d\n", ncopies
);
6505 ensure_base_align (stmt_info
, dr
);
6507 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6509 tree vec_oprnd0
= NULL_TREE
, op
;
6510 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6511 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6512 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6513 edge pe
= loop_preheader_edge (loop
);
6516 enum { NARROW
, NONE
, WIDEN
} modifier
;
6517 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6519 if (nunits
== gather_off_nunits
)
6521 else if (nunits
== gather_off_nunits
/ 2)
6523 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6526 for (i
= 0; i
< gather_off_nunits
; ++i
)
6527 sel
[i
] = i
| nunits
;
6529 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6531 else if (nunits
== gather_off_nunits
* 2)
6533 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6536 for (i
= 0; i
< nunits
; ++i
)
6537 sel
[i
] = i
< gather_off_nunits
6538 ? i
: i
+ nunits
- gather_off_nunits
;
6540 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6546 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6547 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6548 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6549 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6550 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6551 scaletype
= TREE_VALUE (arglist
);
6552 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6554 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6556 ptr
= fold_convert (ptrtype
, gather_base
);
6557 if (!is_gimple_min_invariant (ptr
))
6559 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6560 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6561 gcc_assert (!new_bb
);
6564 /* Currently we support only unconditional gather loads,
6565 so mask should be all ones. */
6566 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6567 mask
= build_int_cst (masktype
, -1);
6568 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6570 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6571 mask
= build_vector_from_val (masktype
, mask
);
6572 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6574 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6578 for (j
= 0; j
< 6; ++j
)
6580 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6581 mask
= build_real (TREE_TYPE (masktype
), r
);
6582 mask
= build_vector_from_val (masktype
, mask
);
6583 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6588 scale
= build_int_cst (scaletype
, gather_scale
);
6590 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6591 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6596 for (j
= 0; j
< 6; ++j
)
6598 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6599 merge
= build_real (TREE_TYPE (rettype
), r
);
6603 merge
= build_vector_from_val (rettype
, merge
);
6604 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6606 prev_stmt_info
= NULL
;
6607 for (j
= 0; j
< ncopies
; ++j
)
6609 if (modifier
== WIDEN
&& (j
& 1))
6610 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6611 perm_mask
, stmt
, gsi
);
6614 = vect_get_vec_def_for_operand (gather_off
, stmt
);
6617 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6619 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6621 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6622 == TYPE_VECTOR_SUBPARTS (idxtype
));
6623 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6624 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6626 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6627 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6632 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6634 if (!useless_type_conversion_p (vectype
, rettype
))
6636 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6637 == TYPE_VECTOR_SUBPARTS (rettype
));
6638 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6639 gimple_call_set_lhs (new_stmt
, op
);
6640 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6641 var
= make_ssa_name (vec_dest
);
6642 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6644 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6648 var
= make_ssa_name (vec_dest
, new_stmt
);
6649 gimple_call_set_lhs (new_stmt
, var
);
6652 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6654 if (modifier
== NARROW
)
6661 var
= permute_vec_elements (prev_res
, var
,
6662 perm_mask
, stmt
, gsi
);
6663 new_stmt
= SSA_NAME_DEF_STMT (var
);
6666 if (prev_stmt_info
== NULL
)
6667 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6669 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6670 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6674 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6676 gimple_stmt_iterator incr_gsi
;
6682 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6683 gimple_seq stmts
= NULL
;
6684 tree stride_base
, stride_step
, alias_off
;
6686 gcc_assert (!nested_in_vect_loop
);
6688 if (slp
&& grouped_load
)
6689 first_dr
= STMT_VINFO_DATA_REF
6690 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6695 = fold_build_pointer_plus
6696 (DR_BASE_ADDRESS (first_dr
),
6697 size_binop (PLUS_EXPR
,
6698 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6699 convert_to_ptrofftype (DR_INIT (first_dr
))));
6700 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6702 /* For a load with loop-invariant (but other than power-of-2)
6703 stride (i.e. not a grouped access) like so:
6705 for (i = 0; i < n; i += stride)
6708 we generate a new induction variable and new accesses to
6709 form a new vector (or vectors, depending on ncopies):
6711 for (j = 0; ; j += VF*stride)
6713 tmp2 = array[j + stride];
6715 vectemp = {tmp1, tmp2, ...}
6718 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6719 build_int_cst (TREE_TYPE (stride_step
), vf
));
6721 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6723 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6724 loop
, &incr_gsi
, insert_after
,
6726 incr
= gsi_stmt (incr_gsi
);
6727 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6729 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6730 &stmts
, true, NULL_TREE
);
6732 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6734 prev_stmt_info
= NULL
;
6735 running_off
= offvar
;
6736 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6737 int nloads
= nunits
;
6738 tree ltype
= TREE_TYPE (vectype
);
6739 auto_vec
<tree
> dr_chain
;
6742 nloads
= nunits
/ group_size
;
6743 if (group_size
< nunits
)
6744 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6747 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6748 /* For SLP permutation support we need to load the whole group,
6749 not only the number of vector stmts the permutation result
6753 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
6754 dr_chain
.create (ncopies
);
6757 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6759 for (j
= 0; j
< ncopies
; j
++)
6765 vec_alloc (v
, nloads
);
6766 for (i
= 0; i
< nloads
; i
++)
6768 tree newref
, newoff
;
6770 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6772 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6775 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6776 newoff
= copy_ssa_name (running_off
);
6777 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6778 running_off
, stride_step
);
6779 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6781 running_off
= newoff
;
6784 vec_inv
= build_constructor (vectype
, v
);
6785 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6786 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6790 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6791 build2 (MEM_REF
, ltype
,
6792 running_off
, alias_off
));
6793 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6795 tree newoff
= copy_ssa_name (running_off
);
6796 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6797 running_off
, stride_step
);
6798 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6800 running_off
= newoff
;
6806 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6808 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6813 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6815 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6816 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6820 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6821 slp_node_instance
, false);
6827 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6828 /* For SLP vectorization we directly vectorize a subchain
6829 without permutation. */
6830 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6831 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6832 /* For BB vectorization always use the first stmt to base
6833 the data ref pointer on. */
6835 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6837 /* Check if the chain of loads is already vectorized. */
6838 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6839 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6840 ??? But we can only do so if there is exactly one
6841 as we have no way to get at the rest. Leave the CSE
6843 ??? With the group load eventually participating
6844 in multiple different permutations (having multiple
6845 slp nodes which refer to the same group) the CSE
6846 is even wrong code. See PR56270. */
6849 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6852 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6853 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6856 /* VEC_NUM is the number of vect stmts to be created for this group. */
6859 grouped_load
= false;
6860 /* For SLP permutation support we need to load the whole group,
6861 not only the number of vector stmts the permutation result
6864 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6866 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6867 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6870 vec_num
= group_size
;
6876 group_size
= vec_num
= 1;
6880 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6881 gcc_assert (alignment_support_scheme
);
6882 /* Targets with load-lane instructions must not require explicit
6884 gcc_assert (!load_lanes_p
6885 || alignment_support_scheme
== dr_aligned
6886 || alignment_support_scheme
== dr_unaligned_supported
);
6888 /* In case the vectorization factor (VF) is bigger than the number
6889 of elements that we can fit in a vectype (nunits), we have to generate
6890 more than one vector stmt - i.e - we need to "unroll" the
6891 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6892 from one copy of the vector stmt to the next, in the field
6893 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6894 stages to find the correct vector defs to be used when vectorizing
6895 stmts that use the defs of the current stmt. The example below
6896 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6897 need to create 4 vectorized stmts):
6899 before vectorization:
6900 RELATED_STMT VEC_STMT
6904 step 1: vectorize stmt S1:
6905 We first create the vector stmt VS1_0, and, as usual, record a
6906 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6907 Next, we create the vector stmt VS1_1, and record a pointer to
6908 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6909 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6911 RELATED_STMT VEC_STMT
6912 VS1_0: vx0 = memref0 VS1_1 -
6913 VS1_1: vx1 = memref1 VS1_2 -
6914 VS1_2: vx2 = memref2 VS1_3 -
6915 VS1_3: vx3 = memref3 - -
6916 S1: x = load - VS1_0
6919 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6920 information we recorded in RELATED_STMT field is used to vectorize
6923 /* In case of interleaving (non-unit grouped access):
6930 Vectorized loads are created in the order of memory accesses
6931 starting from the access of the first stmt of the chain:
6934 VS2: vx1 = &base + vec_size*1
6935 VS3: vx3 = &base + vec_size*2
6936 VS4: vx4 = &base + vec_size*3
6938 Then permutation statements are generated:
6940 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6941 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6944 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6945 (the order of the data-refs in the output of vect_permute_load_chain
6946 corresponds to the order of scalar stmts in the interleaving chain - see
6947 the documentation of vect_permute_load_chain()).
6948 The generation of permutation stmts and recording them in
6949 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6951 In case of both multiple types and interleaving, the vector loads and
6952 permutation stmts above are created for every copy. The result vector
6953 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6954 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6956 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6957 on a target that supports unaligned accesses (dr_unaligned_supported)
6958 we generate the following code:
6962 p = p + indx * vectype_size;
6967 Otherwise, the data reference is potentially unaligned on a target that
6968 does not support unaligned accesses (dr_explicit_realign_optimized) -
6969 then generate the following code, in which the data in each iteration is
6970 obtained by two vector loads, one from the previous iteration, and one
6971 from the current iteration:
6973 msq_init = *(floor(p1))
6974 p2 = initial_addr + VS - 1;
6975 realignment_token = call target_builtin;
6978 p2 = p2 + indx * vectype_size
6980 vec_dest = realign_load (msq, lsq, realignment_token)
6985 /* If the misalignment remains the same throughout the execution of the
6986 loop, we can create the init_addr and permutation mask at the loop
6987 preheader. Otherwise, it needs to be created inside the loop.
6988 This can only occur when vectorizing memory accesses in the inner-loop
6989 nested within an outer-loop that is being vectorized. */
6991 if (nested_in_vect_loop
6992 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6993 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6995 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6996 compute_in_loop
= true;
6999 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7000 || alignment_support_scheme
== dr_explicit_realign
)
7001 && !compute_in_loop
)
7003 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7004 alignment_support_scheme
, NULL_TREE
,
7006 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7008 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7009 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7017 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7020 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7022 aggr_type
= vectype
;
7024 prev_stmt_info
= NULL
;
7025 for (j
= 0; j
< ncopies
; j
++)
7027 /* 1. Create the vector or array pointer update chain. */
7030 bool simd_lane_access_p
7031 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7032 if (simd_lane_access_p
7033 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7034 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7035 && integer_zerop (DR_OFFSET (first_dr
))
7036 && integer_zerop (DR_INIT (first_dr
))
7037 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7038 get_alias_set (DR_REF (first_dr
)))
7039 && (alignment_support_scheme
== dr_aligned
7040 || alignment_support_scheme
== dr_unaligned_supported
))
7042 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7043 dataref_offset
= build_int_cst (reference_alias_ptr_type
7044 (DR_REF (first_dr
)), 0);
7047 else if (first_stmt_for_drptr
7048 && first_stmt
!= first_stmt_for_drptr
)
7051 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7052 at_loop
, offset
, &dummy
, gsi
,
7053 &ptr_incr
, simd_lane_access_p
,
7054 &inv_p
, byte_offset
);
7055 /* Adjust the pointer by the difference to first_stmt. */
7056 data_reference_p ptrdr
7057 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7058 tree diff
= fold_convert (sizetype
,
7059 size_binop (MINUS_EXPR
,
7062 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7067 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7068 offset
, &dummy
, gsi
, &ptr_incr
,
7069 simd_lane_access_p
, &inv_p
,
7072 else if (dataref_offset
)
7073 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7074 TYPE_SIZE_UNIT (aggr_type
));
7076 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7077 TYPE_SIZE_UNIT (aggr_type
));
7079 if (grouped_load
|| slp_perm
)
7080 dr_chain
.create (vec_num
);
7086 vec_array
= create_vector_array (vectype
, vec_num
);
7089 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7090 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
7091 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7092 gimple_call_set_lhs (new_stmt
, vec_array
);
7093 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7095 /* Extract each vector into an SSA_NAME. */
7096 for (i
= 0; i
< vec_num
; i
++)
7098 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7100 dr_chain
.quick_push (new_temp
);
7103 /* Record the mapping between SSA_NAMEs and statements. */
7104 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7108 for (i
= 0; i
< vec_num
; i
++)
7111 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7114 /* 2. Create the vector-load in the loop. */
7115 switch (alignment_support_scheme
)
7118 case dr_unaligned_supported
:
7120 unsigned int align
, misalign
;
7123 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7126 : build_int_cst (reference_alias_ptr_type
7127 (DR_REF (first_dr
)), 0));
7128 align
= TYPE_ALIGN_UNIT (vectype
);
7129 if (alignment_support_scheme
== dr_aligned
)
7131 gcc_assert (aligned_access_p (first_dr
));
7134 else if (DR_MISALIGNMENT (first_dr
) == -1)
7136 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7137 align
= TYPE_ALIGN_UNIT (elem_type
);
7139 align
= (get_object_alignment (DR_REF (first_dr
))
7142 TREE_TYPE (data_ref
)
7143 = build_aligned_type (TREE_TYPE (data_ref
),
7144 align
* BITS_PER_UNIT
);
7148 TREE_TYPE (data_ref
)
7149 = build_aligned_type (TREE_TYPE (data_ref
),
7150 TYPE_ALIGN (elem_type
));
7151 misalign
= DR_MISALIGNMENT (first_dr
);
7153 if (dataref_offset
== NULL_TREE
7154 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7155 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7159 case dr_explicit_realign
:
7163 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7165 if (compute_in_loop
)
7166 msq
= vect_setup_realignment (first_stmt
, gsi
,
7168 dr_explicit_realign
,
7171 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7172 ptr
= copy_ssa_name (dataref_ptr
);
7174 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7175 new_stmt
= gimple_build_assign
7176 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7178 (TREE_TYPE (dataref_ptr
),
7179 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7180 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7182 = build2 (MEM_REF
, vectype
, ptr
,
7183 build_int_cst (reference_alias_ptr_type
7184 (DR_REF (first_dr
)), 0));
7185 vec_dest
= vect_create_destination_var (scalar_dest
,
7187 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7188 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7189 gimple_assign_set_lhs (new_stmt
, new_temp
);
7190 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7191 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7192 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7195 bump
= size_binop (MULT_EXPR
, vs
,
7196 TYPE_SIZE_UNIT (elem_type
));
7197 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7198 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7199 new_stmt
= gimple_build_assign
7200 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7203 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7204 ptr
= copy_ssa_name (ptr
, new_stmt
);
7205 gimple_assign_set_lhs (new_stmt
, ptr
);
7206 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7208 = build2 (MEM_REF
, vectype
, ptr
,
7209 build_int_cst (reference_alias_ptr_type
7210 (DR_REF (first_dr
)), 0));
7213 case dr_explicit_realign_optimized
:
7214 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7215 new_temp
= copy_ssa_name (dataref_ptr
);
7217 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7218 new_stmt
= gimple_build_assign
7219 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7221 (TREE_TYPE (dataref_ptr
),
7222 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7223 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7225 = build2 (MEM_REF
, vectype
, new_temp
,
7226 build_int_cst (reference_alias_ptr_type
7227 (DR_REF (first_dr
)), 0));
7232 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7233 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7234 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7235 gimple_assign_set_lhs (new_stmt
, new_temp
);
7236 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7238 /* 3. Handle explicit realignment if necessary/supported.
7240 vec_dest = realign_load (msq, lsq, realignment_token) */
7241 if (alignment_support_scheme
== dr_explicit_realign_optimized
7242 || alignment_support_scheme
== dr_explicit_realign
)
7244 lsq
= gimple_assign_lhs (new_stmt
);
7245 if (!realignment_token
)
7246 realignment_token
= dataref_ptr
;
7247 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7248 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7249 msq
, lsq
, realignment_token
);
7250 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7251 gimple_assign_set_lhs (new_stmt
, new_temp
);
7252 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7254 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7257 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7258 add_phi_arg (phi
, lsq
,
7259 loop_latch_edge (containing_loop
),
7265 /* 4. Handle invariant-load. */
7266 if (inv_p
&& !bb_vinfo
)
7268 gcc_assert (!grouped_load
);
7269 /* If we have versioned for aliasing or the loop doesn't
7270 have any data dependencies that would preclude this,
7271 then we are sure this is a loop invariant load and
7272 thus we can insert it on the preheader edge. */
7273 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7274 && !nested_in_vect_loop
7275 && hoist_defs_of_uses (stmt
, loop
))
7277 if (dump_enabled_p ())
7279 dump_printf_loc (MSG_NOTE
, vect_location
,
7280 "hoisting out of the vectorized "
7282 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7284 tree tem
= copy_ssa_name (scalar_dest
);
7285 gsi_insert_on_edge_immediate
7286 (loop_preheader_edge (loop
),
7287 gimple_build_assign (tem
,
7289 (gimple_assign_rhs1 (stmt
))));
7290 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7291 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7292 set_vinfo_for_stmt (new_stmt
,
7293 new_stmt_vec_info (new_stmt
, vinfo
));
7297 gimple_stmt_iterator gsi2
= *gsi
;
7299 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7301 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7307 tree perm_mask
= perm_mask_for_reverse (vectype
);
7308 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7309 perm_mask
, stmt
, gsi
);
7310 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7313 /* Collect vector loads and later create their permutation in
7314 vect_transform_grouped_load (). */
7315 if (grouped_load
|| slp_perm
)
7316 dr_chain
.quick_push (new_temp
);
7318 /* Store vector loads in the corresponding SLP_NODE. */
7319 if (slp
&& !slp_perm
)
7320 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7322 /* Bump the vector pointer to account for a gap or for excess
7323 elements loaded for a permuted SLP load. */
7324 if (group_gap_adj
!= 0)
7328 = wide_int_to_tree (sizetype
,
7329 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7330 group_gap_adj
, &ovf
));
7331 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7336 if (slp
&& !slp_perm
)
7341 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7342 slp_node_instance
, false))
7344 dr_chain
.release ();
7353 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7354 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7359 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7361 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7362 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7365 dr_chain
.release ();
7371 /* Function vect_is_simple_cond.
7374 LOOP - the loop that is being vectorized.
7375 COND - Condition that is checked for simple use.
7378 *COMP_VECTYPE - the vector type for the comparison.
7380 Returns whether a COND can be vectorized. Checks whether
7381 condition operands are supportable using vec_is_simple_use. */
7384 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7387 enum vect_def_type dt
;
7388 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7391 if (TREE_CODE (cond
) == SSA_NAME
7392 && TREE_CODE (TREE_TYPE (cond
)) == BOOLEAN_TYPE
)
7394 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7395 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7398 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7403 if (!COMPARISON_CLASS_P (cond
))
7406 lhs
= TREE_OPERAND (cond
, 0);
7407 rhs
= TREE_OPERAND (cond
, 1);
7409 if (TREE_CODE (lhs
) == SSA_NAME
)
7411 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7412 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7415 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7416 && TREE_CODE (lhs
) != FIXED_CST
)
7419 if (TREE_CODE (rhs
) == SSA_NAME
)
7421 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7422 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7425 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7426 && TREE_CODE (rhs
) != FIXED_CST
)
7429 if (vectype1
&& vectype2
7430 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7433 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7437 /* vectorizable_condition.
7439 Check if STMT is conditional modify expression that can be vectorized.
7440 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7441 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7444 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7445 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7446 else clause if it is 2).
7448 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7451 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7452 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7455 tree scalar_dest
= NULL_TREE
;
7456 tree vec_dest
= NULL_TREE
;
7457 tree cond_expr
, then_clause
, else_clause
;
7458 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7459 tree comp_vectype
= NULL_TREE
;
7460 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7461 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7464 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7465 enum vect_def_type dt
, dts
[4];
7467 enum tree_code code
;
7468 stmt_vec_info prev_stmt_info
= NULL
;
7470 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7471 vec
<tree
> vec_oprnds0
= vNULL
;
7472 vec
<tree
> vec_oprnds1
= vNULL
;
7473 vec
<tree
> vec_oprnds2
= vNULL
;
7474 vec
<tree
> vec_oprnds3
= vNULL
;
7476 bool masked
= false;
7478 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7481 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7483 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7486 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7487 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7491 /* FORNOW: not yet supported. */
7492 if (STMT_VINFO_LIVE_P (stmt_info
))
7494 if (dump_enabled_p ())
7495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7496 "value used after loop.\n");
7501 /* Is vectorizable conditional operation? */
7502 if (!is_gimple_assign (stmt
))
7505 code
= gimple_assign_rhs_code (stmt
);
7507 if (code
!= COND_EXPR
)
7510 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7511 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7512 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7517 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7519 gcc_assert (ncopies
>= 1);
7520 if (reduc_index
&& ncopies
> 1)
7521 return false; /* FORNOW */
7523 cond_expr
= gimple_assign_rhs1 (stmt
);
7524 then_clause
= gimple_assign_rhs2 (stmt
);
7525 else_clause
= gimple_assign_rhs3 (stmt
);
7527 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7532 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7535 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7539 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7542 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7545 masked
= !COMPARISON_CLASS_P (cond_expr
);
7546 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7548 if (vec_cmp_type
== NULL_TREE
)
7553 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7554 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7561 vec_oprnds0
.create (1);
7562 vec_oprnds1
.create (1);
7563 vec_oprnds2
.create (1);
7564 vec_oprnds3
.create (1);
7568 scalar_dest
= gimple_assign_lhs (stmt
);
7569 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7571 /* Handle cond expr. */
7572 for (j
= 0; j
< ncopies
; j
++)
7574 gassign
*new_stmt
= NULL
;
7579 auto_vec
<tree
, 4> ops
;
7580 auto_vec
<vec
<tree
>, 4> vec_defs
;
7583 ops
.safe_push (cond_expr
);
7586 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7587 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7589 ops
.safe_push (then_clause
);
7590 ops
.safe_push (else_clause
);
7591 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7592 vec_oprnds3
= vec_defs
.pop ();
7593 vec_oprnds2
= vec_defs
.pop ();
7595 vec_oprnds1
= vec_defs
.pop ();
7596 vec_oprnds0
= vec_defs
.pop ();
7599 vec_defs
.release ();
7607 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7609 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7615 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7616 stmt
, comp_vectype
);
7617 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0),
7618 loop_vinfo
, >emp
, &dts
[0]);
7621 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7622 stmt
, comp_vectype
);
7623 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1),
7624 loop_vinfo
, >emp
, &dts
[1]);
7626 if (reduc_index
== 1)
7627 vec_then_clause
= reduc_def
;
7630 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7632 vect_is_simple_use (then_clause
, loop_vinfo
,
7635 if (reduc_index
== 2)
7636 vec_else_clause
= reduc_def
;
7639 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7641 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7648 = vect_get_vec_def_for_stmt_copy (dts
[0],
7649 vec_oprnds0
.pop ());
7652 = vect_get_vec_def_for_stmt_copy (dts
[1],
7653 vec_oprnds1
.pop ());
7655 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7656 vec_oprnds2
.pop ());
7657 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7658 vec_oprnds3
.pop ());
7663 vec_oprnds0
.quick_push (vec_cond_lhs
);
7665 vec_oprnds1
.quick_push (vec_cond_rhs
);
7666 vec_oprnds2
.quick_push (vec_then_clause
);
7667 vec_oprnds3
.quick_push (vec_else_clause
);
7670 /* Arguments are ready. Create the new vector stmt. */
7671 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7673 vec_then_clause
= vec_oprnds2
[i
];
7674 vec_else_clause
= vec_oprnds3
[i
];
7677 vec_compare
= vec_cond_lhs
;
7680 vec_cond_rhs
= vec_oprnds1
[i
];
7681 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7682 vec_cond_lhs
, vec_cond_rhs
);
7684 new_temp
= make_ssa_name (vec_dest
);
7685 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
7686 vec_compare
, vec_then_clause
,
7688 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7690 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7697 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7699 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7701 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7704 vec_oprnds0
.release ();
7705 vec_oprnds1
.release ();
7706 vec_oprnds2
.release ();
7707 vec_oprnds3
.release ();
7712 /* vectorizable_comparison.
7714 Check if STMT is comparison expression that can be vectorized.
7715 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7716 comparison, put it in VEC_STMT, and insert it at GSI.
7718 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7721 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7722 gimple
**vec_stmt
, tree reduc_def
,
7725 tree lhs
, rhs1
, rhs2
;
7726 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7727 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7728 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7729 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
7731 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7732 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
7735 enum tree_code code
;
7736 stmt_vec_info prev_stmt_info
= NULL
;
7738 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7739 vec
<tree
> vec_oprnds0
= vNULL
;
7740 vec
<tree
> vec_oprnds1
= vNULL
;
7745 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7748 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
7751 mask_type
= vectype
;
7752 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7757 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7759 gcc_assert (ncopies
>= 1);
7760 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7761 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7765 if (STMT_VINFO_LIVE_P (stmt_info
))
7767 if (dump_enabled_p ())
7768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7769 "value used after loop.\n");
7773 if (!is_gimple_assign (stmt
))
7776 code
= gimple_assign_rhs_code (stmt
);
7778 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
7781 rhs1
= gimple_assign_rhs1 (stmt
);
7782 rhs2
= gimple_assign_rhs2 (stmt
);
7784 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
7785 &dts
[0], &vectype1
))
7788 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
7789 &dts
[1], &vectype2
))
7792 if (vectype1
&& vectype2
7793 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7796 vectype
= vectype1
? vectype1
: vectype2
;
7798 /* Invariant comparison. */
7801 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
7802 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
7805 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
7810 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
7811 vect_model_simple_cost (stmt_info
, ncopies
, dts
, NULL
, NULL
);
7812 return expand_vec_cmp_expr_p (vectype
, mask_type
);
7818 vec_oprnds0
.create (1);
7819 vec_oprnds1
.create (1);
7823 lhs
= gimple_assign_lhs (stmt
);
7824 mask
= vect_create_destination_var (lhs
, mask_type
);
7826 /* Handle cmp expr. */
7827 for (j
= 0; j
< ncopies
; j
++)
7829 gassign
*new_stmt
= NULL
;
7834 auto_vec
<tree
, 2> ops
;
7835 auto_vec
<vec
<tree
>, 2> vec_defs
;
7837 ops
.safe_push (rhs1
);
7838 ops
.safe_push (rhs2
);
7839 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7840 vec_oprnds1
= vec_defs
.pop ();
7841 vec_oprnds0
= vec_defs
.pop ();
7845 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
7846 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
7851 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
7852 vec_oprnds0
.pop ());
7853 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
7854 vec_oprnds1
.pop ());
7859 vec_oprnds0
.quick_push (vec_rhs1
);
7860 vec_oprnds1
.quick_push (vec_rhs2
);
7863 /* Arguments are ready. Create the new vector stmt. */
7864 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
7866 vec_rhs2
= vec_oprnds1
[i
];
7868 new_temp
= make_ssa_name (mask
);
7869 new_stmt
= gimple_build_assign (new_temp
, code
, vec_rhs1
, vec_rhs2
);
7870 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7872 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7879 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7881 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7883 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7886 vec_oprnds0
.release ();
7887 vec_oprnds1
.release ();
7892 /* Make sure the statement is vectorizable. */
7895 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
7897 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7898 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7899 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7901 tree scalar_type
, vectype
;
7902 gimple
*pattern_stmt
;
7903 gimple_seq pattern_def_seq
;
7905 if (dump_enabled_p ())
7907 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7908 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7911 if (gimple_has_volatile_ops (stmt
))
7913 if (dump_enabled_p ())
7914 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7915 "not vectorized: stmt has volatile operands\n");
7920 /* Skip stmts that do not need to be vectorized. In loops this is expected
7922 - the COND_EXPR which is the loop exit condition
7923 - any LABEL_EXPRs in the loop
7924 - computations that are used only for array indexing or loop control.
7925 In basic blocks we only analyze statements that are a part of some SLP
7926 instance, therefore, all the statements are relevant.
7928 Pattern statement needs to be analyzed instead of the original statement
7929 if the original statement is not relevant. Otherwise, we analyze both
7930 statements. In basic blocks we are called from some SLP instance
7931 traversal, don't analyze pattern stmts instead, the pattern stmts
7932 already will be part of SLP instance. */
7934 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7935 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7936 && !STMT_VINFO_LIVE_P (stmt_info
))
7938 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7940 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7941 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7943 /* Analyze PATTERN_STMT instead of the original stmt. */
7944 stmt
= pattern_stmt
;
7945 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7946 if (dump_enabled_p ())
7948 dump_printf_loc (MSG_NOTE
, vect_location
,
7949 "==> examining pattern statement: ");
7950 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7955 if (dump_enabled_p ())
7956 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7961 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7964 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7965 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7967 /* Analyze PATTERN_STMT too. */
7968 if (dump_enabled_p ())
7970 dump_printf_loc (MSG_NOTE
, vect_location
,
7971 "==> examining pattern statement: ");
7972 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7975 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7979 if (is_pattern_stmt_p (stmt_info
)
7981 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7983 gimple_stmt_iterator si
;
7985 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7987 gimple
*pattern_def_stmt
= gsi_stmt (si
);
7988 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7989 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7991 /* Analyze def stmt of STMT if it's a pattern stmt. */
7992 if (dump_enabled_p ())
7994 dump_printf_loc (MSG_NOTE
, vect_location
,
7995 "==> examining pattern def statement: ");
7996 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7999 if (!vect_analyze_stmt (pattern_def_stmt
,
8000 need_to_vectorize
, node
))
8006 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8008 case vect_internal_def
:
8011 case vect_reduction_def
:
8012 case vect_nested_cycle
:
8013 gcc_assert (!bb_vinfo
8014 && (relevance
== vect_used_in_outer
8015 || relevance
== vect_used_in_outer_by_reduction
8016 || relevance
== vect_used_by_reduction
8017 || relevance
== vect_unused_in_scope
));
8020 case vect_induction_def
:
8021 case vect_constant_def
:
8022 case vect_external_def
:
8023 case vect_unknown_def_type
:
8030 gcc_assert (PURE_SLP_STMT (stmt_info
));
8032 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8033 if (dump_enabled_p ())
8035 dump_printf_loc (MSG_NOTE
, vect_location
,
8036 "get vectype for scalar type: ");
8037 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8038 dump_printf (MSG_NOTE
, "\n");
8041 vectype
= get_vectype_for_scalar_type (scalar_type
);
8044 if (dump_enabled_p ())
8046 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8047 "not SLPed: unsupported data-type ");
8048 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8050 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8055 if (dump_enabled_p ())
8057 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8058 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8059 dump_printf (MSG_NOTE
, "\n");
8062 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8065 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8067 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8068 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8069 || (is_gimple_call (stmt
)
8070 && gimple_call_lhs (stmt
) == NULL_TREE
));
8071 *need_to_vectorize
= true;
8074 if (PURE_SLP_STMT (stmt_info
) && !node
)
8076 dump_printf_loc (MSG_NOTE
, vect_location
,
8077 "handled only by SLP analysis\n");
8083 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8084 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8085 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8086 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8087 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8088 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8089 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8090 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8091 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8092 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8093 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8094 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8095 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8099 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8100 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8101 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8102 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8103 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8104 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8105 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8106 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8107 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8108 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8113 if (dump_enabled_p ())
8115 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8116 "not vectorized: relevant stmt not ");
8117 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8118 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8127 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8128 need extra handling, except for vectorizable reductions. */
8129 if (STMT_VINFO_LIVE_P (stmt_info
)
8130 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8131 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
8135 if (dump_enabled_p ())
8137 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8138 "not vectorized: live stmt not ");
8139 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8140 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8150 /* Function vect_transform_stmt.
8152 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8155 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8156 bool *grouped_store
, slp_tree slp_node
,
8157 slp_instance slp_node_instance
)
8159 bool is_store
= false;
8160 gimple
*vec_stmt
= NULL
;
8161 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8164 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8165 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8167 switch (STMT_VINFO_TYPE (stmt_info
))
8169 case type_demotion_vec_info_type
:
8170 case type_promotion_vec_info_type
:
8171 case type_conversion_vec_info_type
:
8172 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8176 case induc_vec_info_type
:
8177 gcc_assert (!slp_node
);
8178 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8182 case shift_vec_info_type
:
8183 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8187 case op_vec_info_type
:
8188 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8192 case assignment_vec_info_type
:
8193 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8197 case load_vec_info_type
:
8198 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8203 case store_vec_info_type
:
8204 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8206 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8208 /* In case of interleaving, the whole chain is vectorized when the
8209 last store in the chain is reached. Store stmts before the last
8210 one are skipped, and there vec_stmt_info shouldn't be freed
8212 *grouped_store
= true;
8213 if (STMT_VINFO_VEC_STMT (stmt_info
))
8220 case condition_vec_info_type
:
8221 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8225 case comparison_vec_info_type
:
8226 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8230 case call_vec_info_type
:
8231 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8232 stmt
= gsi_stmt (*gsi
);
8233 if (is_gimple_call (stmt
)
8234 && gimple_call_internal_p (stmt
)
8235 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
8239 case call_simd_clone_vec_info_type
:
8240 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8241 stmt
= gsi_stmt (*gsi
);
8244 case reduc_vec_info_type
:
8245 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8250 if (!STMT_VINFO_LIVE_P (stmt_info
))
8252 if (dump_enabled_p ())
8253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8254 "stmt not supported.\n");
8259 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8260 This would break hybrid SLP vectorization. */
8262 gcc_assert (!vec_stmt
8263 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8265 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8266 is being vectorized, but outside the immediately enclosing loop. */
8268 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8269 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8270 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8271 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8272 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8273 || STMT_VINFO_RELEVANT (stmt_info
) ==
8274 vect_used_in_outer_by_reduction
))
8276 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8277 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8278 imm_use_iterator imm_iter
;
8279 use_operand_p use_p
;
8283 if (dump_enabled_p ())
8284 dump_printf_loc (MSG_NOTE
, vect_location
,
8285 "Record the vdef for outer-loop vectorization.\n");
8287 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8288 (to be used when vectorizing outer-loop stmts that use the DEF of
8290 if (gimple_code (stmt
) == GIMPLE_PHI
)
8291 scalar_dest
= PHI_RESULT (stmt
);
8293 scalar_dest
= gimple_assign_lhs (stmt
);
8295 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8297 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8299 exit_phi
= USE_STMT (use_p
);
8300 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8305 /* Handle stmts whose DEF is used outside the loop-nest that is
8306 being vectorized. */
8307 if (STMT_VINFO_LIVE_P (stmt_info
)
8308 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8310 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
8315 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8321 /* Remove a group of stores (for SLP or interleaving), free their
8325 vect_remove_stores (gimple
*first_stmt
)
8327 gimple
*next
= first_stmt
;
8329 gimple_stmt_iterator next_si
;
8333 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8335 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8336 if (is_pattern_stmt_p (stmt_info
))
8337 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8338 /* Free the attached stmt_vec_info and remove the stmt. */
8339 next_si
= gsi_for_stmt (next
);
8340 unlink_stmt_vdef (next
);
8341 gsi_remove (&next_si
, true);
8342 release_defs (next
);
8343 free_stmt_vec_info (next
);
8349 /* Function new_stmt_vec_info.
8351 Create and initialize a new stmt_vec_info struct for STMT. */
8354 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8357 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8359 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8360 STMT_VINFO_STMT (res
) = stmt
;
8362 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8363 STMT_VINFO_LIVE_P (res
) = false;
8364 STMT_VINFO_VECTYPE (res
) = NULL
;
8365 STMT_VINFO_VEC_STMT (res
) = NULL
;
8366 STMT_VINFO_VECTORIZABLE (res
) = true;
8367 STMT_VINFO_IN_PATTERN_P (res
) = false;
8368 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8369 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8370 STMT_VINFO_DATA_REF (res
) = NULL
;
8371 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8373 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8374 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8375 STMT_VINFO_DR_INIT (res
) = NULL
;
8376 STMT_VINFO_DR_STEP (res
) = NULL
;
8377 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8379 if (gimple_code (stmt
) == GIMPLE_PHI
8380 && is_loop_header_bb_p (gimple_bb (stmt
)))
8381 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8383 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8385 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8386 STMT_SLP_TYPE (res
) = loop_vect
;
8387 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8389 GROUP_FIRST_ELEMENT (res
) = NULL
;
8390 GROUP_NEXT_ELEMENT (res
) = NULL
;
8391 GROUP_SIZE (res
) = 0;
8392 GROUP_STORE_COUNT (res
) = 0;
8393 GROUP_GAP (res
) = 0;
8394 GROUP_SAME_DR_STMT (res
) = NULL
;
8400 /* Create a hash table for stmt_vec_info. */
8403 init_stmt_vec_info_vec (void)
8405 gcc_assert (!stmt_vec_info_vec
.exists ());
8406 stmt_vec_info_vec
.create (50);
8410 /* Free hash table for stmt_vec_info. */
8413 free_stmt_vec_info_vec (void)
8417 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8419 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8420 gcc_assert (stmt_vec_info_vec
.exists ());
8421 stmt_vec_info_vec
.release ();
8425 /* Free stmt vectorization related info. */
8428 free_stmt_vec_info (gimple
*stmt
)
8430 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8435 /* Check if this statement has a related "pattern stmt"
8436 (introduced by the vectorizer during the pattern recognition
8437 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8439 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8441 stmt_vec_info patt_info
8442 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8445 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8446 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8447 gimple_set_bb (patt_stmt
, NULL
);
8448 tree lhs
= gimple_get_lhs (patt_stmt
);
8449 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8450 release_ssa_name (lhs
);
8453 gimple_stmt_iterator si
;
8454 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8456 gimple
*seq_stmt
= gsi_stmt (si
);
8457 gimple_set_bb (seq_stmt
, NULL
);
8458 lhs
= gimple_get_lhs (seq_stmt
);
8459 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8460 release_ssa_name (lhs
);
8461 free_stmt_vec_info (seq_stmt
);
8464 free_stmt_vec_info (patt_stmt
);
8468 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8469 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8470 set_vinfo_for_stmt (stmt
, NULL
);
8475 /* Function get_vectype_for_scalar_type_and_size.
8477 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8481 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8483 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8484 machine_mode simd_mode
;
8485 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8492 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8493 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8496 /* For vector types of elements whose mode precision doesn't
8497 match their types precision we use a element type of mode
8498 precision. The vectorization routines will have to make sure
8499 they support the proper result truncation/extension.
8500 We also make sure to build vector types with INTEGER_TYPE
8501 component type only. */
8502 if (INTEGRAL_TYPE_P (scalar_type
)
8503 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8504 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8505 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8506 TYPE_UNSIGNED (scalar_type
));
8508 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8509 When the component mode passes the above test simply use a type
8510 corresponding to that mode. The theory is that any use that
8511 would cause problems with this will disable vectorization anyway. */
8512 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8513 && !INTEGRAL_TYPE_P (scalar_type
))
8514 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8516 /* We can't build a vector type of elements with alignment bigger than
8518 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8519 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8520 TYPE_UNSIGNED (scalar_type
));
8522 /* If we felt back to using the mode fail if there was
8523 no scalar type for it. */
8524 if (scalar_type
== NULL_TREE
)
8527 /* If no size was supplied use the mode the target prefers. Otherwise
8528 lookup a vector mode of the specified size. */
8530 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8532 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8533 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8537 vectype
= build_vector_type (scalar_type
, nunits
);
8539 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8540 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8546 unsigned int current_vector_size
;
8548 /* Function get_vectype_for_scalar_type.
8550 Returns the vector type corresponding to SCALAR_TYPE as supported
8554 get_vectype_for_scalar_type (tree scalar_type
)
8557 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8558 current_vector_size
);
8560 && current_vector_size
== 0)
8561 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8565 /* Function get_mask_type_for_scalar_type.
8567 Returns the mask type corresponding to a result of comparison
8568 of vectors of specified SCALAR_TYPE as supported by target. */
8571 get_mask_type_for_scalar_type (tree scalar_type
)
8573 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
8578 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
8579 current_vector_size
);
8582 /* Function get_same_sized_vectype
8584 Returns a vector type corresponding to SCALAR_TYPE of size
8585 VECTOR_TYPE if supported by the target. */
8588 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8590 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
8591 return build_same_sized_truth_vector_type (vector_type
);
8593 return get_vectype_for_scalar_type_and_size
8594 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8597 /* Function vect_is_simple_use.
8600 VINFO - the vect info of the loop or basic block that is being vectorized.
8601 OPERAND - operand in the loop or bb.
8603 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8604 DT - the type of definition
8606 Returns whether a stmt with OPERAND can be vectorized.
8607 For loops, supportable operands are constants, loop invariants, and operands
8608 that are defined by the current iteration of the loop. Unsupportable
8609 operands are those that are defined by a previous iteration of the loop (as
8610 is the case in reduction/induction computations).
8611 For basic blocks, supportable operands are constants and bb invariants.
8612 For now, operands defined outside the basic block are not supported. */
8615 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8616 gimple
**def_stmt
, enum vect_def_type
*dt
)
8619 *dt
= vect_unknown_def_type
;
8621 if (dump_enabled_p ())
8623 dump_printf_loc (MSG_NOTE
, vect_location
,
8624 "vect_is_simple_use: operand ");
8625 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8626 dump_printf (MSG_NOTE
, "\n");
8629 if (CONSTANT_CLASS_P (operand
))
8631 *dt
= vect_constant_def
;
8635 if (is_gimple_min_invariant (operand
))
8637 *dt
= vect_external_def
;
8641 if (TREE_CODE (operand
) != SSA_NAME
)
8643 if (dump_enabled_p ())
8644 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8649 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8651 *dt
= vect_external_def
;
8655 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8656 if (dump_enabled_p ())
8658 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8659 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8662 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
8663 *dt
= vect_external_def
;
8666 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8667 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8670 if (dump_enabled_p ())
8672 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8675 case vect_uninitialized_def
:
8676 dump_printf (MSG_NOTE
, "uninitialized\n");
8678 case vect_constant_def
:
8679 dump_printf (MSG_NOTE
, "constant\n");
8681 case vect_external_def
:
8682 dump_printf (MSG_NOTE
, "external\n");
8684 case vect_internal_def
:
8685 dump_printf (MSG_NOTE
, "internal\n");
8687 case vect_induction_def
:
8688 dump_printf (MSG_NOTE
, "induction\n");
8690 case vect_reduction_def
:
8691 dump_printf (MSG_NOTE
, "reduction\n");
8693 case vect_double_reduction_def
:
8694 dump_printf (MSG_NOTE
, "double reduction\n");
8696 case vect_nested_cycle
:
8697 dump_printf (MSG_NOTE
, "nested cycle\n");
8699 case vect_unknown_def_type
:
8700 dump_printf (MSG_NOTE
, "unknown\n");
8705 if (*dt
== vect_unknown_def_type
)
8707 if (dump_enabled_p ())
8708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8709 "Unsupported pattern.\n");
8713 switch (gimple_code (*def_stmt
))
8720 if (dump_enabled_p ())
8721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8722 "unsupported defining stmt:\n");
8729 /* Function vect_is_simple_use.
8731 Same as vect_is_simple_use but also determines the vector operand
8732 type of OPERAND and stores it to *VECTYPE. If the definition of
8733 OPERAND is vect_uninitialized_def, vect_constant_def or
8734 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8735 is responsible to compute the best suited vector type for the
8739 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8740 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
8742 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
8745 /* Now get a vector type if the def is internal, otherwise supply
8746 NULL_TREE and leave it up to the caller to figure out a proper
8747 type for the use stmt. */
8748 if (*dt
== vect_internal_def
8749 || *dt
== vect_induction_def
8750 || *dt
== vect_reduction_def
8751 || *dt
== vect_double_reduction_def
8752 || *dt
== vect_nested_cycle
)
8754 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8756 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8757 && !STMT_VINFO_RELEVANT (stmt_info
)
8758 && !STMT_VINFO_LIVE_P (stmt_info
))
8759 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8761 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8762 gcc_assert (*vectype
!= NULL_TREE
);
8764 else if (*dt
== vect_uninitialized_def
8765 || *dt
== vect_constant_def
8766 || *dt
== vect_external_def
)
8767 *vectype
= NULL_TREE
;
8775 /* Function supportable_widening_operation
8777 Check whether an operation represented by the code CODE is a
8778 widening operation that is supported by the target platform in
8779 vector form (i.e., when operating on arguments of type VECTYPE_IN
8780 producing a result of type VECTYPE_OUT).
8782 Widening operations we currently support are NOP (CONVERT), FLOAT
8783 and WIDEN_MULT. This function checks if these operations are supported
8784 by the target platform either directly (via vector tree-codes), or via
8788 - CODE1 and CODE2 are codes of vector operations to be used when
8789 vectorizing the operation, if available.
8790 - MULTI_STEP_CVT determines the number of required intermediate steps in
8791 case of multi-step conversion (like char->short->int - in that case
8792 MULTI_STEP_CVT will be 1).
8793 - INTERM_TYPES contains the intermediate type required to perform the
8794 widening operation (short in the above example). */
8797 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
8798 tree vectype_out
, tree vectype_in
,
8799 enum tree_code
*code1
, enum tree_code
*code2
,
8800 int *multi_step_cvt
,
8801 vec
<tree
> *interm_types
)
8803 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8804 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8805 struct loop
*vect_loop
= NULL
;
8806 machine_mode vec_mode
;
8807 enum insn_code icode1
, icode2
;
8808 optab optab1
, optab2
;
8809 tree vectype
= vectype_in
;
8810 tree wide_vectype
= vectype_out
;
8811 enum tree_code c1
, c2
;
8813 tree prev_type
, intermediate_type
;
8814 machine_mode intermediate_mode
, prev_mode
;
8815 optab optab3
, optab4
;
8817 *multi_step_cvt
= 0;
8819 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8823 case WIDEN_MULT_EXPR
:
8824 /* The result of a vectorized widening operation usually requires
8825 two vectors (because the widened results do not fit into one vector).
8826 The generated vector results would normally be expected to be
8827 generated in the same order as in the original scalar computation,
8828 i.e. if 8 results are generated in each vector iteration, they are
8829 to be organized as follows:
8830 vect1: [res1,res2,res3,res4],
8831 vect2: [res5,res6,res7,res8].
8833 However, in the special case that the result of the widening
8834 operation is used in a reduction computation only, the order doesn't
8835 matter (because when vectorizing a reduction we change the order of
8836 the computation). Some targets can take advantage of this and
8837 generate more efficient code. For example, targets like Altivec,
8838 that support widen_mult using a sequence of {mult_even,mult_odd}
8839 generate the following vectors:
8840 vect1: [res1,res3,res5,res7],
8841 vect2: [res2,res4,res6,res8].
8843 When vectorizing outer-loops, we execute the inner-loop sequentially
8844 (each vectorized inner-loop iteration contributes to VF outer-loop
8845 iterations in parallel). We therefore don't allow to change the
8846 order of the computation in the inner-loop during outer-loop
8848 /* TODO: Another case in which order doesn't *really* matter is when we
8849 widen and then contract again, e.g. (short)((int)x * y >> 8).
8850 Normally, pack_trunc performs an even/odd permute, whereas the
8851 repack from an even/odd expansion would be an interleave, which
8852 would be significantly simpler for e.g. AVX2. */
8853 /* In any case, in order to avoid duplicating the code below, recurse
8854 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8855 are properly set up for the caller. If we fail, we'll continue with
8856 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8858 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8859 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8860 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8861 stmt
, vectype_out
, vectype_in
,
8862 code1
, code2
, multi_step_cvt
,
8865 /* Elements in a vector with vect_used_by_reduction property cannot
8866 be reordered if the use chain with this property does not have the
8867 same operation. One such an example is s += a * b, where elements
8868 in a and b cannot be reordered. Here we check if the vector defined
8869 by STMT is only directly used in the reduction statement. */
8870 tree lhs
= gimple_assign_lhs (stmt
);
8871 use_operand_p dummy
;
8873 stmt_vec_info use_stmt_info
= NULL
;
8874 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8875 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8876 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8879 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8880 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8893 case VEC_WIDEN_MULT_EVEN_EXPR
:
8894 /* Support the recursion induced just above. */
8895 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8896 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8899 case WIDEN_LSHIFT_EXPR
:
8900 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8901 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8905 c1
= VEC_UNPACK_LO_EXPR
;
8906 c2
= VEC_UNPACK_HI_EXPR
;
8910 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8911 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8914 case FIX_TRUNC_EXPR
:
8915 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8916 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8917 computing the operation. */
8924 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8927 if (code
== FIX_TRUNC_EXPR
)
8929 /* The signedness is determined from output operand. */
8930 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8931 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8935 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8936 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8939 if (!optab1
|| !optab2
)
8942 vec_mode
= TYPE_MODE (vectype
);
8943 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8944 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8950 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8951 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8952 /* For scalar masks we may have different boolean
8953 vector types having the same QImode. Thus we
8954 add additional check for elements number. */
8955 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
8956 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
8957 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
8959 /* Check if it's a multi-step conversion that can be done using intermediate
8962 prev_type
= vectype
;
8963 prev_mode
= vec_mode
;
8965 if (!CONVERT_EXPR_CODE_P (code
))
8968 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8969 intermediate steps in promotion sequence. We try
8970 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8972 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8973 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8975 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8976 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
8979 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
8980 current_vector_size
);
8981 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
8986 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8987 TYPE_UNSIGNED (prev_type
));
8989 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8990 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8992 if (!optab3
|| !optab4
8993 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8994 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8995 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8996 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8997 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8998 == CODE_FOR_nothing
)
8999 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9000 == CODE_FOR_nothing
))
9003 interm_types
->quick_push (intermediate_type
);
9004 (*multi_step_cvt
)++;
9006 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9007 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9008 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9009 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9010 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9012 prev_type
= intermediate_type
;
9013 prev_mode
= intermediate_mode
;
9016 interm_types
->release ();
9021 /* Function supportable_narrowing_operation
9023 Check whether an operation represented by the code CODE is a
9024 narrowing operation that is supported by the target platform in
9025 vector form (i.e., when operating on arguments of type VECTYPE_IN
9026 and producing a result of type VECTYPE_OUT).
9028 Narrowing operations we currently support are NOP (CONVERT) and
9029 FIX_TRUNC. This function checks if these operations are supported by
9030 the target platform directly via vector tree-codes.
9033 - CODE1 is the code of a vector operation to be used when
9034 vectorizing the operation, if available.
9035 - MULTI_STEP_CVT determines the number of required intermediate steps in
9036 case of multi-step conversion (like int->short->char - in that case
9037 MULTI_STEP_CVT will be 1).
9038 - INTERM_TYPES contains the intermediate type required to perform the
9039 narrowing operation (short in the above example). */
9042 supportable_narrowing_operation (enum tree_code code
,
9043 tree vectype_out
, tree vectype_in
,
9044 enum tree_code
*code1
, int *multi_step_cvt
,
9045 vec
<tree
> *interm_types
)
9047 machine_mode vec_mode
;
9048 enum insn_code icode1
;
9049 optab optab1
, interm_optab
;
9050 tree vectype
= vectype_in
;
9051 tree narrow_vectype
= vectype_out
;
9053 tree intermediate_type
, prev_type
;
9054 machine_mode intermediate_mode
, prev_mode
;
9058 *multi_step_cvt
= 0;
9062 c1
= VEC_PACK_TRUNC_EXPR
;
9065 case FIX_TRUNC_EXPR
:
9066 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9070 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9071 tree code and optabs used for computing the operation. */
9078 if (code
== FIX_TRUNC_EXPR
)
9079 /* The signedness is determined from output operand. */
9080 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9082 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9087 vec_mode
= TYPE_MODE (vectype
);
9088 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9093 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9094 /* For scalar masks we may have different boolean
9095 vector types having the same QImode. Thus we
9096 add additional check for elements number. */
9097 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9098 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9099 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9101 /* Check if it's a multi-step conversion that can be done using intermediate
9103 prev_mode
= vec_mode
;
9104 prev_type
= vectype
;
9105 if (code
== FIX_TRUNC_EXPR
)
9106 uns
= TYPE_UNSIGNED (vectype_out
);
9108 uns
= TYPE_UNSIGNED (vectype
);
9110 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9111 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9112 costly than signed. */
9113 if (code
== FIX_TRUNC_EXPR
&& uns
)
9115 enum insn_code icode2
;
9118 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9120 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9121 if (interm_optab
!= unknown_optab
9122 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9123 && insn_data
[icode1
].operand
[0].mode
9124 == insn_data
[icode2
].operand
[0].mode
)
9127 optab1
= interm_optab
;
9132 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9133 intermediate steps in promotion sequence. We try
9134 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9135 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9136 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9138 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9139 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9142 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9143 current_vector_size
);
9144 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9149 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9151 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9154 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9155 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9156 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9157 == CODE_FOR_nothing
))
9160 interm_types
->quick_push (intermediate_type
);
9161 (*multi_step_cvt
)++;
9163 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9164 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9165 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9166 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9168 prev_mode
= intermediate_mode
;
9169 prev_type
= intermediate_type
;
9170 optab1
= interm_optab
;
9173 interm_types
->release ();