1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "hard-reg-set.h"
43 #include "dominance.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
50 #include "gimple-expr.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
69 #include "statistics.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
81 #include "recog.h" /* FIXME: for insn_data */
82 #include "insn-codes.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
87 #include "plugin-api.h"
92 /* For lang_hooks.types.type_for_mode. */
93 #include "langhooks.h"
95 /* Return the vectorized type for the given statement. */
98 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
100 return STMT_VINFO_VECTYPE (stmt_info
);
103 /* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
106 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
108 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
109 basic_block bb
= gimple_bb (stmt
);
110 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
116 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
118 return (bb
->loop_father
== loop
->inner
);
121 /* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
126 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
127 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
128 int misalign
, enum vect_cost_model_location where
)
132 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
133 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
134 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
137 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
142 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
143 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
144 void *target_cost_data
;
147 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
149 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
151 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
156 /* Return a variable of type ELEM_TYPE[NELEMS]. */
159 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
161 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
165 /* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
171 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
172 tree array
, unsigned HOST_WIDE_INT n
)
174 tree vect_type
, vect
, vect_name
, array_ref
;
177 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
178 vect_type
= TREE_TYPE (TREE_TYPE (array
));
179 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
180 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
181 build_int_cst (size_type_node
, n
),
182 NULL_TREE
, NULL_TREE
);
184 new_stmt
= gimple_build_assign (vect
, array_ref
);
185 vect_name
= make_ssa_name (vect
, new_stmt
);
186 gimple_assign_set_lhs (new_stmt
, vect_name
);
187 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
192 /* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
197 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
198 tree array
, unsigned HOST_WIDE_INT n
)
203 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
204 build_int_cst (size_type_node
, n
),
205 NULL_TREE
, NULL_TREE
);
207 new_stmt
= gimple_build_assign (array_ref
, vect
);
208 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
211 /* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
216 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
218 tree mem_ref
, alias_ptr_type
;
220 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
221 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
229 /* Function vect_mark_relevant.
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
234 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
235 enum vect_relevant relevant
, bool live_p
,
236 bool used_in_pattern
)
238 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "mark relevant %d, live %d.\n", relevant
, live_p
);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
254 if (!used_in_pattern
)
256 imm_use_iterator imm_iter
;
260 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
261 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
263 if (is_gimple_assign (stmt
))
264 lhs
= gimple_assign_lhs (stmt
);
266 lhs
= gimple_call_lhs (stmt
);
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
271 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
272 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
274 if (is_gimple_debug (USE_STMT (use_p
)))
276 use_stmt
= USE_STMT (use_p
);
278 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
281 if (vinfo_for_stmt (use_stmt
)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
297 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE
, vect_location
,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info
= vinfo_for_stmt (pattern_stmt
);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
305 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
306 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
311 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
312 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
313 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
315 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE
, vect_location
,
320 "already marked relevant/live.\n");
324 worklist
->safe_push (stmt
);
328 /* Function vect_stmt_relevant_p.
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
338 CHECKME: what other side effects would the vectorizer allow? */
341 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
342 enum vect_relevant
*relevant
, bool *live_p
)
344 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
346 imm_use_iterator imm_iter
;
350 *relevant
= vect_unused_in_scope
;
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt
)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
356 != loop_exit_ctrl_vec_info_type
)
357 *relevant
= vect_used_in_scope
;
359 /* changing memory. */
360 if (gimple_code (stmt
) != GIMPLE_PHI
)
361 if (gimple_vdef (stmt
)
362 && !gimple_clobber_p (stmt
))
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE
, vect_location
,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant
= vect_used_in_scope
;
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
373 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
375 basic_block bb
= gimple_bb (USE_STMT (use_p
));
376 if (!flow_bb_inside_loop_p (loop
, bb
))
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE
, vect_location
,
380 "vec_stmt_relevant_p: used out of loop.\n");
382 if (is_gimple_debug (USE_STMT (use_p
)))
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
388 gcc_assert (bb
== single_exit (loop
)->dest
);
395 return (*live_p
|| *relevant
);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
405 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
408 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info
))
416 /* STMT has a data_ref. FORNOW this means that its of one of
420 (This should have been verified in analyze_data_refs).
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
429 if (!gimple_assign_copy_p (stmt
))
431 if (is_gimple_call (stmt
)
432 && gimple_call_internal_p (stmt
))
433 switch (gimple_call_internal_fn (stmt
))
436 operand
= gimple_call_arg (stmt
, 3);
441 operand
= gimple_call_arg (stmt
, 2);
451 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
453 operand
= gimple_assign_rhs1 (stmt
);
454 if (TREE_CODE (operand
) != SSA_NAME
)
465 Function process_use.
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
489 Return true if everything is as expected. Return false otherwise. */
492 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
493 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
496 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
497 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
498 stmt_vec_info dstmt_vinfo
;
499 basic_block bb
, def_bb
;
502 enum vect_def_type dt
;
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
509 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
513 "not vectorized: unsupported use in stmt.\n");
517 if (!def_stmt
|| gimple_nop_p (def_stmt
))
520 def_bb
= gimple_bb (def_stmt
);
521 if (!flow_bb_inside_loop_p (loop
, def_bb
))
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
534 bb
= gimple_bb (stmt
);
535 if (gimple_code (stmt
) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
537 && gimple_code (def_stmt
) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
539 && bb
->loop_father
== def_bb
->loop_father
)
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE
, vect_location
,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
545 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
559 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE
, vect_location
,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
567 case vect_unused_in_scope
:
568 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
569 vect_used_in_scope
: vect_unused_in_scope
;
572 case vect_used_in_outer_by_reduction
:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
574 relevant
= vect_used_by_reduction
;
577 case vect_used_in_outer
:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
579 relevant
= vect_used_in_scope
;
582 case vect_used_in_scope
:
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
597 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE
, vect_location
,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
605 case vect_unused_in_scope
:
606 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
608 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
611 case vect_used_by_reduction
:
612 relevant
= vect_used_in_outer_by_reduction
;
615 case vect_used_in_scope
:
616 relevant
= vect_used_in_outer
;
624 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
625 is_pattern_stmt_p (stmt_vinfo
));
630 /* Function vect_mark_stmts_to_be_vectorized.
632 Not all stmts in the loop need to be vectorized. For example:
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
644 This pass detects such stmts. */
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
649 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
650 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
651 unsigned int nbbs
= loop
->num_nodes
;
652 gimple_stmt_iterator si
;
655 stmt_vec_info stmt_vinfo
;
659 enum vect_relevant relevant
, tmp_relevant
;
660 enum vect_def_type def_type
;
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE
, vect_location
,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
666 auto_vec
<gimple
, 64> worklist
;
668 /* 1. Init worklist. */
669 for (i
= 0; i
< nbbs
; i
++)
672 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
681 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
682 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
684 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
686 stmt
= gsi_stmt (si
);
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
693 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
694 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
698 /* 2. Process_worklist */
699 while (worklist
.length () > 0)
704 stmt
= worklist
.pop ();
705 if (dump_enabled_p ())
707 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 liveness and relevance properties of STMT. */
714 stmt_vinfo
= vinfo_for_stmt (stmt
);
715 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
716 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
728 those that are used by a reduction computation, and those that are
729 (also) used by a regular computation. This allows us later on to
730 identify stmts that are used solely by a reduction, and therefore the
731 order of the results that they produce does not have to be kept. */
733 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
734 tmp_relevant
= relevant
;
737 case vect_reduction_def
:
738 switch (tmp_relevant
)
740 case vect_unused_in_scope
:
741 relevant
= vect_used_by_reduction
;
744 case vect_used_by_reduction
:
745 if (gimple_code (stmt
) == GIMPLE_PHI
)
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
752 "unsupported use of reduction.\n");
759 case vect_nested_cycle
:
760 if (tmp_relevant
!= vect_unused_in_scope
761 && tmp_relevant
!= vect_used_in_outer_by_reduction
762 && tmp_relevant
!= vect_used_in_outer
)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
766 "unsupported use of nested cycle.\n");
774 case vect_double_reduction_def
:
775 if (tmp_relevant
!= vect_unused_in_scope
776 && tmp_relevant
!= vect_used_by_reduction
)
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
780 "unsupported use of double reduction.\n");
792 if (is_pattern_stmt_p (stmt_vinfo
))
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt
))
799 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
800 tree op
= gimple_assign_rhs1 (stmt
);
803 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
805 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
806 live_p
, relevant
, &worklist
, false)
807 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
808 live_p
, relevant
, &worklist
, false))
812 for (; i
< gimple_num_ops (stmt
); i
++)
814 op
= gimple_op (stmt
, i
);
815 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
820 else if (is_gimple_call (stmt
))
822 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
824 tree arg
= gimple_call_arg (stmt
, i
);
825 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
832 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
834 tree op
= USE_FROM_PTR (use_p
);
835 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
840 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
843 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
845 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
849 } /* while worklist */
855 /* Function vect_model_simple_cost.
857 Models cost for simple operations, i.e. those that only emit ncopies of a
858 single op. Right now, this does not account for multiple insns that could
859 be generated for the single vector op. We will handle that shortly. */
862 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
863 enum vect_def_type
*dt
,
864 stmt_vector_for_cost
*prologue_cost_vec
,
865 stmt_vector_for_cost
*body_cost_vec
)
868 int inside_cost
= 0, prologue_cost
= 0;
870 /* The SLP costs were already calculated during SLP tree build. */
871 if (PURE_SLP_STMT (stmt_info
))
874 /* FORNOW: Assuming maximum 2 args per stmts. */
875 for (i
= 0; i
< 2; i
++)
876 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
877 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
878 stmt_info
, 0, vect_prologue
);
880 /* Pass the inside-of-loop statements to the target-specific cost model. */
881 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
882 stmt_info
, 0, vect_body
);
884 if (dump_enabled_p ())
885 dump_printf_loc (MSG_NOTE
, vect_location
,
886 "vect_model_simple_cost: inside_cost = %d, "
887 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
891 /* Model cost for type demotion and promotion operations. PWR is normally
892 zero for single-step promotions and demotions. It will be one if
893 two-step promotion/demotion is required, and so on. Each additional
894 step doubles the number of instructions required. */
897 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
898 enum vect_def_type
*dt
, int pwr
)
901 int inside_cost
= 0, prologue_cost
= 0;
902 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
903 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
904 void *target_cost_data
;
906 /* The SLP costs were already calculated during SLP tree build. */
907 if (PURE_SLP_STMT (stmt_info
))
911 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
913 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
915 for (i
= 0; i
< pwr
+ 1; i
++)
917 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
919 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
920 vec_promote_demote
, stmt_info
, 0,
924 /* FORNOW: Assuming maximum 2 args per stmts. */
925 for (i
= 0; i
< 2; i
++)
926 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
927 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
928 stmt_info
, 0, vect_prologue
);
930 if (dump_enabled_p ())
931 dump_printf_loc (MSG_NOTE
, vect_location
,
932 "vect_model_promotion_demotion_cost: inside_cost = %d, "
933 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
936 /* Function vect_cost_group_size
938 For grouped load or store, return the group_size only if it is the first
939 load or store of a group, else return 1. This ensures that group size is
940 only returned once per group. */
943 vect_cost_group_size (stmt_vec_info stmt_info
)
945 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
947 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
948 return GROUP_SIZE (stmt_info
);
954 /* Function vect_model_store_cost
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
960 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
961 bool store_lanes_p
, enum vect_def_type dt
,
963 stmt_vector_for_cost
*prologue_cost_vec
,
964 stmt_vector_for_cost
*body_cost_vec
)
967 unsigned int inside_cost
= 0, prologue_cost
= 0;
968 struct data_reference
*first_dr
;
971 /* The SLP costs were already calculated during SLP tree build. */
972 if (PURE_SLP_STMT (stmt_info
))
975 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
976 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
977 stmt_info
, 0, vect_prologue
);
979 /* Grouped access? */
980 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
984 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
989 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
990 group_size
= vect_cost_group_size (stmt_info
);
993 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
995 /* Not a grouped access. */
999 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
1002 /* We assume that the cost of a single store-lanes instruction is
1003 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1004 access is instead being provided by a permute-and-store operation,
1005 include the cost of the permutes. */
1006 if (!store_lanes_p
&& group_size
> 1)
1008 /* Uses a high and low interleave or shuffle operations for each
1010 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1011 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1012 stmt_info
, 0, vect_body
);
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_NOTE
, vect_location
,
1016 "vect_model_store_cost: strided group_size = %d .\n",
1020 /* Costs of the stores. */
1021 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1023 if (dump_enabled_p ())
1024 dump_printf_loc (MSG_NOTE
, vect_location
,
1025 "vect_model_store_cost: inside_cost = %d, "
1026 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1030 /* Calculate cost of DR's memory access. */
1032 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1033 unsigned int *inside_cost
,
1034 stmt_vector_for_cost
*body_cost_vec
)
1036 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1037 gimple stmt
= DR_STMT (dr
);
1038 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1040 switch (alignment_support_scheme
)
1044 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1045 vector_store
, stmt_info
, 0,
1048 if (dump_enabled_p ())
1049 dump_printf_loc (MSG_NOTE
, vect_location
,
1050 "vect_model_store_cost: aligned.\n");
1054 case dr_unaligned_supported
:
1056 /* Here, we assign an additional cost for the unaligned store. */
1057 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1058 unaligned_store
, stmt_info
,
1059 DR_MISALIGNMENT (dr
), vect_body
);
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE
, vect_location
,
1062 "vect_model_store_cost: unaligned supported by "
1067 case dr_unaligned_unsupported
:
1069 *inside_cost
= VECT_MAX_COST
;
1071 if (dump_enabled_p ())
1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1073 "vect_model_store_cost: unsupported access.\n");
1083 /* Function vect_model_load_cost
1085 Models cost for loads. In the case of grouped accesses, the last access
1086 has the overhead of the grouped access attributed to it. Since unaligned
1087 accesses are supported for loads, we also account for the costs of the
1088 access scheme chosen. */
1091 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1092 bool load_lanes_p
, slp_tree slp_node
,
1093 stmt_vector_for_cost
*prologue_cost_vec
,
1094 stmt_vector_for_cost
*body_cost_vec
)
1098 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1099 unsigned int inside_cost
= 0, prologue_cost
= 0;
1101 /* The SLP costs were already calculated during SLP tree build. */
1102 if (PURE_SLP_STMT (stmt_info
))
1105 /* Grouped accesses? */
1106 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1107 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1109 group_size
= vect_cost_group_size (stmt_info
);
1110 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1112 /* Not a grouped access. */
1119 /* We assume that the cost of a single load-lanes instruction is
1120 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1121 access is instead being provided by a load-and-permute operation,
1122 include the cost of the permutes. */
1123 if (!load_lanes_p
&& group_size
> 1)
1125 /* Uses an even and odd extract operations or shuffle operations
1126 for each needed permute. */
1127 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1128 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1129 stmt_info
, 0, vect_body
);
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE
, vect_location
,
1133 "vect_model_load_cost: strided group_size = %d .\n",
1137 /* The loads themselves. */
1138 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1140 /* N scalar loads plus gathering them into a vector. */
1141 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1142 inside_cost
+= record_stmt_cost (body_cost_vec
,
1143 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1144 scalar_load
, stmt_info
, 0, vect_body
);
1145 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1146 stmt_info
, 0, vect_body
);
1149 vect_get_load_cost (first_dr
, ncopies
,
1150 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1151 || group_size
> 1 || slp_node
),
1152 &inside_cost
, &prologue_cost
,
1153 prologue_cost_vec
, body_cost_vec
, true);
1155 if (dump_enabled_p ())
1156 dump_printf_loc (MSG_NOTE
, vect_location
,
1157 "vect_model_load_cost: inside_cost = %d, "
1158 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1162 /* Calculate cost of DR's memory access. */
1164 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1165 bool add_realign_cost
, unsigned int *inside_cost
,
1166 unsigned int *prologue_cost
,
1167 stmt_vector_for_cost
*prologue_cost_vec
,
1168 stmt_vector_for_cost
*body_cost_vec
,
1169 bool record_prologue_costs
)
1171 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1172 gimple stmt
= DR_STMT (dr
);
1173 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1175 switch (alignment_support_scheme
)
1179 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1180 stmt_info
, 0, vect_body
);
1182 if (dump_enabled_p ())
1183 dump_printf_loc (MSG_NOTE
, vect_location
,
1184 "vect_model_load_cost: aligned.\n");
1188 case dr_unaligned_supported
:
1190 /* Here, we assign an additional cost for the unaligned load. */
1191 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1192 unaligned_load
, stmt_info
,
1193 DR_MISALIGNMENT (dr
), vect_body
);
1195 if (dump_enabled_p ())
1196 dump_printf_loc (MSG_NOTE
, vect_location
,
1197 "vect_model_load_cost: unaligned supported by "
1202 case dr_explicit_realign
:
1204 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1205 vector_load
, stmt_info
, 0, vect_body
);
1206 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1207 vec_perm
, stmt_info
, 0, vect_body
);
1209 /* FIXME: If the misalignment remains fixed across the iterations of
1210 the containing loop, the following cost should be added to the
1212 if (targetm
.vectorize
.builtin_mask_for_load
)
1213 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1214 stmt_info
, 0, vect_body
);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE
, vect_location
,
1218 "vect_model_load_cost: explicit realign\n");
1222 case dr_explicit_realign_optimized
:
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE
, vect_location
,
1226 "vect_model_load_cost: unaligned software "
1229 /* Unaligned software pipeline has a load of an address, an initial
1230 load, and possibly a mask operation to "prime" the loop. However,
1231 if this is an access in a group of loads, which provide grouped
1232 access, then the above cost should only be considered for one
1233 access in the group. Inside the loop, there is a load op
1234 and a realignment op. */
1236 if (add_realign_cost
&& record_prologue_costs
)
1238 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1239 vector_stmt
, stmt_info
,
1241 if (targetm
.vectorize
.builtin_mask_for_load
)
1242 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1243 vector_stmt
, stmt_info
,
1247 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1248 stmt_info
, 0, vect_body
);
1249 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1250 stmt_info
, 0, vect_body
);
1252 if (dump_enabled_p ())
1253 dump_printf_loc (MSG_NOTE
, vect_location
,
1254 "vect_model_load_cost: explicit realign optimized"
1260 case dr_unaligned_unsupported
:
1262 *inside_cost
= VECT_MAX_COST
;
1264 if (dump_enabled_p ())
1265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1266 "vect_model_load_cost: unsupported access.\n");
1275 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1276 the loop preheader for the vectorized stmt STMT. */
1279 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1282 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1285 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1286 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1290 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1294 if (nested_in_vect_loop_p (loop
, stmt
))
1297 pe
= loop_preheader_edge (loop
);
1298 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1299 gcc_assert (!new_bb
);
1303 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1305 gimple_stmt_iterator gsi_bb_start
;
1307 gcc_assert (bb_vinfo
);
1308 bb
= BB_VINFO_BB (bb_vinfo
);
1309 gsi_bb_start
= gsi_after_labels (bb
);
1310 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1314 if (dump_enabled_p ())
1316 dump_printf_loc (MSG_NOTE
, vect_location
,
1317 "created new init_stmt: ");
1318 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1322 /* Function vect_init_vector.
1324 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1325 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1326 vector type a vector with all elements equal to VAL is created first.
1327 Place the initialization at BSI if it is not NULL. Otherwise, place the
1328 initialization at the loop preheader.
1329 Return the DEF of INIT_STMT.
1330 It will be used in the vectorization of STMT. */
1333 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1340 if (TREE_CODE (type
) == VECTOR_TYPE
1341 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1343 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1345 if (CONSTANT_CLASS_P (val
))
1346 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1349 new_temp
= make_ssa_name (TREE_TYPE (type
));
1350 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1351 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1355 val
= build_vector_from_val (type
, val
);
1358 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1359 init_stmt
= gimple_build_assign (new_var
, val
);
1360 new_temp
= make_ssa_name (new_var
, init_stmt
);
1361 gimple_assign_set_lhs (init_stmt
, new_temp
);
1362 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1363 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1368 /* Function vect_get_vec_def_for_operand.
1370 OP is an operand in STMT. This function returns a (vector) def that will be
1371 used in the vectorized stmt for STMT.
1373 In the case that OP is an SSA_NAME which is defined in the loop, then
1374 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1376 In case OP is an invariant or constant, a new stmt that creates a vector def
1377 needs to be introduced. */
1380 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1385 stmt_vec_info def_stmt_info
= NULL
;
1386 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1387 unsigned int nunits
;
1388 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1390 enum vect_def_type dt
;
1394 if (dump_enabled_p ())
1396 dump_printf_loc (MSG_NOTE
, vect_location
,
1397 "vect_get_vec_def_for_operand: ");
1398 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1399 dump_printf (MSG_NOTE
, "\n");
1402 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1403 &def_stmt
, &def
, &dt
);
1404 gcc_assert (is_simple_use
);
1405 if (dump_enabled_p ())
1407 int loc_printed
= 0;
1410 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1412 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1413 dump_printf (MSG_NOTE
, "\n");
1418 dump_printf (MSG_NOTE
, " def_stmt = ");
1420 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1421 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1427 /* Case 1: operand is a constant. */
1428 case vect_constant_def
:
1430 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1431 gcc_assert (vector_type
);
1432 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1437 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1438 if (dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE
, vect_location
,
1440 "Create vector_cst. nunits = %d\n", nunits
);
1442 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1445 /* Case 2: operand is defined outside the loop - loop invariant. */
1446 case vect_external_def
:
1448 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1449 gcc_assert (vector_type
);
1454 /* Create 'vec_inv = {inv,inv,..,inv}' */
1455 if (dump_enabled_p ())
1456 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1458 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1461 /* Case 3: operand is defined inside the loop. */
1462 case vect_internal_def
:
1465 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1467 /* Get the def from the vectorized stmt. */
1468 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1470 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1471 /* Get vectorized pattern statement. */
1473 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1474 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1475 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1476 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1477 gcc_assert (vec_stmt
);
1478 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1479 vec_oprnd
= PHI_RESULT (vec_stmt
);
1480 else if (is_gimple_call (vec_stmt
))
1481 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1483 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1487 /* Case 4: operand is defined by a loop header phi - reduction */
1488 case vect_reduction_def
:
1489 case vect_double_reduction_def
:
1490 case vect_nested_cycle
:
1494 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1495 loop
= (gimple_bb (def_stmt
))->loop_father
;
1497 /* Get the def before the loop */
1498 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1499 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1502 /* Case 5: operand is defined by loop-header phi - induction. */
1503 case vect_induction_def
:
1505 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1507 /* Get the def from the vectorized stmt. */
1508 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1509 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1510 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1511 vec_oprnd
= PHI_RESULT (vec_stmt
);
1513 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1523 /* Function vect_get_vec_def_for_stmt_copy
1525 Return a vector-def for an operand. This function is used when the
1526 vectorized stmt to be created (by the caller to this function) is a "copy"
1527 created in case the vectorized result cannot fit in one vector, and several
1528 copies of the vector-stmt are required. In this case the vector-def is
1529 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1530 of the stmt that defines VEC_OPRND.
1531 DT is the type of the vector def VEC_OPRND.
1534 In case the vectorization factor (VF) is bigger than the number
1535 of elements that can fit in a vectype (nunits), we have to generate
1536 more than one vector stmt to vectorize the scalar stmt. This situation
1537 arises when there are multiple data-types operated upon in the loop; the
1538 smallest data-type determines the VF, and as a result, when vectorizing
1539 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1540 vector stmt (each computing a vector of 'nunits' results, and together
1541 computing 'VF' results in each iteration). This function is called when
1542 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1543 which VF=16 and nunits=4, so the number of copies required is 4):
1545 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1547 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1548 VS1.1: vx.1 = memref1 VS1.2
1549 VS1.2: vx.2 = memref2 VS1.3
1550 VS1.3: vx.3 = memref3
1552 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1553 VSnew.1: vz1 = vx.1 + ... VSnew.2
1554 VSnew.2: vz2 = vx.2 + ... VSnew.3
1555 VSnew.3: vz3 = vx.3 + ...
1557 The vectorization of S1 is explained in vectorizable_load.
1558 The vectorization of S2:
1559 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1560 the function 'vect_get_vec_def_for_operand' is called to
1561 get the relevant vector-def for each operand of S2. For operand x it
1562 returns the vector-def 'vx.0'.
1564 To create the remaining copies of the vector-stmt (VSnew.j), this
1565 function is called to get the relevant vector-def for each operand. It is
1566 obtained from the respective VS1.j stmt, which is recorded in the
1567 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1569 For example, to obtain the vector-def 'vx.1' in order to create the
1570 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1571 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1572 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1573 and return its def ('vx.1').
1574 Overall, to create the above sequence this function will be called 3 times:
1575 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1576 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1577 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1580 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1582 gimple vec_stmt_for_operand
;
1583 stmt_vec_info def_stmt_info
;
1585 /* Do nothing; can reuse same def. */
1586 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1589 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1590 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1591 gcc_assert (def_stmt_info
);
1592 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1593 gcc_assert (vec_stmt_for_operand
);
1594 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1595 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1596 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1598 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1607 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1608 vec
<tree
> *vec_oprnds0
,
1609 vec
<tree
> *vec_oprnds1
)
1611 tree vec_oprnd
= vec_oprnds0
->pop ();
1613 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1614 vec_oprnds0
->quick_push (vec_oprnd
);
1616 if (vec_oprnds1
&& vec_oprnds1
->length ())
1618 vec_oprnd
= vec_oprnds1
->pop ();
1619 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1620 vec_oprnds1
->quick_push (vec_oprnd
);
1625 /* Get vectorized definitions for OP0 and OP1.
1626 REDUC_INDEX is the index of reduction operand in case of reduction,
1627 and -1 otherwise. */
1630 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1631 vec
<tree
> *vec_oprnds0
,
1632 vec
<tree
> *vec_oprnds1
,
1633 slp_tree slp_node
, int reduc_index
)
1637 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1638 auto_vec
<tree
> ops (nops
);
1639 auto_vec
<vec
<tree
> > vec_defs (nops
);
1641 ops
.quick_push (op0
);
1643 ops
.quick_push (op1
);
1645 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1647 *vec_oprnds0
= vec_defs
[0];
1649 *vec_oprnds1
= vec_defs
[1];
1655 vec_oprnds0
->create (1);
1656 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1657 vec_oprnds0
->quick_push (vec_oprnd
);
1661 vec_oprnds1
->create (1);
1662 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1663 vec_oprnds1
->quick_push (vec_oprnd
);
1669 /* Function vect_finish_stmt_generation.
1671 Insert a new stmt. */
1674 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1675 gimple_stmt_iterator
*gsi
)
1677 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1678 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1679 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1681 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1683 if (!gsi_end_p (*gsi
)
1684 && gimple_has_mem_ops (vec_stmt
))
1686 gimple at_stmt
= gsi_stmt (*gsi
);
1687 tree vuse
= gimple_vuse (at_stmt
);
1688 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1690 tree vdef
= gimple_vdef (at_stmt
);
1691 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1692 /* If we have an SSA vuse and insert a store, update virtual
1693 SSA form to avoid triggering the renamer. Do so only
1694 if we can easily see all uses - which is what almost always
1695 happens with the way vectorized stmts are inserted. */
1696 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1697 && ((is_gimple_assign (vec_stmt
)
1698 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1699 || (is_gimple_call (vec_stmt
)
1700 && !(gimple_call_flags (vec_stmt
)
1701 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1703 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1704 gimple_set_vdef (vec_stmt
, new_vdef
);
1705 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1709 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1711 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1714 if (dump_enabled_p ())
1716 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1717 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1720 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1722 /* While EH edges will generally prevent vectorization, stmt might
1723 e.g. be in a must-not-throw region. Ensure newly created stmts
1724 that could throw are part of the same region. */
1725 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1726 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1727 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1730 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1731 a function declaration if the target has a vectorized version
1732 of the function, or NULL_TREE if the function cannot be vectorized. */
1735 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1737 tree fndecl
= gimple_call_fndecl (call
);
1739 /* We only handle functions that do not read or clobber memory -- i.e.
1740 const or novops ones. */
1741 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1745 || TREE_CODE (fndecl
) != FUNCTION_DECL
1746 || !DECL_BUILT_IN (fndecl
))
1749 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1754 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1755 gimple_stmt_iterator
*);
1758 /* Function vectorizable_mask_load_store.
1760 Check if STMT performs a conditional load or store that can be vectorized.
1761 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1762 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1763 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1766 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1767 gimple
*vec_stmt
, slp_tree slp_node
)
1769 tree vec_dest
= NULL
;
1770 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1771 stmt_vec_info prev_stmt_info
;
1772 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1773 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1774 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1775 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1776 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1780 tree dataref_ptr
= NULL_TREE
;
1782 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1786 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1787 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1788 int gather_scale
= 1;
1789 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1794 enum vect_def_type dt
;
1796 if (slp_node
!= NULL
)
1799 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1800 gcc_assert (ncopies
>= 1);
1802 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1803 mask
= gimple_call_arg (stmt
, 2);
1804 if (TYPE_PRECISION (TREE_TYPE (mask
))
1805 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1808 /* FORNOW. This restriction should be relaxed. */
1809 if (nested_in_vect_loop
&& ncopies
> 1)
1811 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1813 "multiple types in nested loop.");
1817 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1820 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1823 if (!STMT_VINFO_DATA_REF (stmt_info
))
1826 elem_type
= TREE_TYPE (vectype
);
1828 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1831 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1834 if (STMT_VINFO_GATHER_P (stmt_info
))
1838 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1839 &gather_off
, &gather_scale
);
1840 gcc_assert (gather_decl
);
1841 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1842 &def_stmt
, &def
, &gather_dt
,
1843 &gather_off_vectype
))
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1847 "gather index use not simple.");
1851 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1853 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1854 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1858 "masked gather with integer mask not supported.");
1862 else if (tree_int_cst_compare (nested_in_vect_loop
1863 ? STMT_VINFO_DR_STEP (stmt_info
)
1864 : DR_STEP (dr
), size_zero_node
) <= 0)
1866 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1867 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1870 if (TREE_CODE (mask
) != SSA_NAME
)
1873 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1874 &def_stmt
, &def
, &dt
))
1879 tree rhs
= gimple_call_arg (stmt
, 3);
1880 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1881 &def_stmt
, &def
, &dt
))
1885 if (!vec_stmt
) /* transformation not required. */
1887 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1889 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1892 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1898 if (STMT_VINFO_GATHER_P (stmt_info
))
1900 tree vec_oprnd0
= NULL_TREE
, op
;
1901 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1902 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1903 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1904 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1905 tree mask_perm_mask
= NULL_TREE
;
1906 edge pe
= loop_preheader_edge (loop
);
1909 enum { NARROW
, NONE
, WIDEN
} modifier
;
1910 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1912 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1913 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1914 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1915 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1916 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1917 scaletype
= TREE_VALUE (arglist
);
1918 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1919 && types_compatible_p (srctype
, masktype
));
1921 if (nunits
== gather_off_nunits
)
1923 else if (nunits
== gather_off_nunits
/ 2)
1925 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1928 for (i
= 0; i
< gather_off_nunits
; ++i
)
1929 sel
[i
] = i
| nunits
;
1931 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1933 else if (nunits
== gather_off_nunits
* 2)
1935 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1938 for (i
= 0; i
< nunits
; ++i
)
1939 sel
[i
] = i
< gather_off_nunits
1940 ? i
: i
+ nunits
- gather_off_nunits
;
1942 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1944 for (i
= 0; i
< nunits
; ++i
)
1945 sel
[i
] = i
| gather_off_nunits
;
1946 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1951 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1953 ptr
= fold_convert (ptrtype
, gather_base
);
1954 if (!is_gimple_min_invariant (ptr
))
1956 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1957 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1958 gcc_assert (!new_bb
);
1961 scale
= build_int_cst (scaletype
, gather_scale
);
1963 prev_stmt_info
= NULL
;
1964 for (j
= 0; j
< ncopies
; ++j
)
1966 if (modifier
== WIDEN
&& (j
& 1))
1967 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1968 perm_mask
, stmt
, gsi
);
1971 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1974 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1976 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1978 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1979 == TYPE_VECTOR_SUBPARTS (idxtype
));
1980 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1981 var
= make_ssa_name (var
);
1982 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1984 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1985 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1989 if (mask_perm_mask
&& (j
& 1))
1990 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1991 mask_perm_mask
, stmt
, gsi
);
1995 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1998 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1999 &def_stmt
, &def
, &dt
);
2000 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2004 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2006 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2007 == TYPE_VECTOR_SUBPARTS (masktype
));
2008 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2010 var
= make_ssa_name (var
);
2011 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2013 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2014 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2020 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2023 if (!useless_type_conversion_p (vectype
, rettype
))
2025 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2026 == TYPE_VECTOR_SUBPARTS (rettype
));
2027 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2028 op
= make_ssa_name (var
, new_stmt
);
2029 gimple_call_set_lhs (new_stmt
, op
);
2030 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2031 var
= make_ssa_name (vec_dest
);
2032 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2033 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2037 var
= make_ssa_name (vec_dest
, new_stmt
);
2038 gimple_call_set_lhs (new_stmt
, var
);
2041 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2043 if (modifier
== NARROW
)
2050 var
= permute_vec_elements (prev_res
, var
,
2051 perm_mask
, stmt
, gsi
);
2052 new_stmt
= SSA_NAME_DEF_STMT (var
);
2055 if (prev_stmt_info
== NULL
)
2056 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2058 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2059 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2062 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2064 tree lhs
= gimple_call_lhs (stmt
);
2065 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2066 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2067 set_vinfo_for_stmt (stmt
, NULL
);
2068 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2069 gsi_replace (gsi
, new_stmt
, true);
2074 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2075 prev_stmt_info
= NULL
;
2076 for (i
= 0; i
< ncopies
; i
++)
2078 unsigned align
, misalign
;
2082 tree rhs
= gimple_call_arg (stmt
, 3);
2083 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2084 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2085 /* We should have catched mismatched types earlier. */
2086 gcc_assert (useless_type_conversion_p (vectype
,
2087 TREE_TYPE (vec_rhs
)));
2088 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2089 NULL_TREE
, &dummy
, gsi
,
2090 &ptr_incr
, false, &inv_p
);
2091 gcc_assert (!inv_p
);
2095 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2097 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2098 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2100 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2101 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2102 TYPE_SIZE_UNIT (vectype
));
2105 align
= TYPE_ALIGN_UNIT (vectype
);
2106 if (aligned_access_p (dr
))
2108 else if (DR_MISALIGNMENT (dr
) == -1)
2110 align
= TYPE_ALIGN_UNIT (elem_type
);
2114 misalign
= DR_MISALIGNMENT (dr
);
2115 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2118 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2119 gimple_call_arg (stmt
, 1),
2121 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2123 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2125 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2126 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2131 tree vec_mask
= NULL_TREE
;
2132 prev_stmt_info
= NULL
;
2133 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2134 for (i
= 0; i
< ncopies
; i
++)
2136 unsigned align
, misalign
;
2140 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2141 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2142 NULL_TREE
, &dummy
, gsi
,
2143 &ptr_incr
, false, &inv_p
);
2144 gcc_assert (!inv_p
);
2148 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2150 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2151 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2152 TYPE_SIZE_UNIT (vectype
));
2155 align
= TYPE_ALIGN_UNIT (vectype
);
2156 if (aligned_access_p (dr
))
2158 else if (DR_MISALIGNMENT (dr
) == -1)
2160 align
= TYPE_ALIGN_UNIT (elem_type
);
2164 misalign
= DR_MISALIGNMENT (dr
);
2165 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2168 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2169 gimple_call_arg (stmt
, 1),
2171 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2172 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2174 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2176 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2177 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2183 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2185 tree lhs
= gimple_call_lhs (stmt
);
2186 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2187 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2188 set_vinfo_for_stmt (stmt
, NULL
);
2189 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2190 gsi_replace (gsi
, new_stmt
, true);
2197 /* Function vectorizable_call.
2199 Check if GS performs a function call that can be vectorized.
2200 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2201 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2202 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2205 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2212 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2213 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2214 tree vectype_out
, vectype_in
;
2217 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2218 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2219 tree fndecl
, new_temp
, def
, rhs_type
;
2221 enum vect_def_type dt
[3]
2222 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2223 gimple new_stmt
= NULL
;
2225 vec
<tree
> vargs
= vNULL
;
2226 enum { NARROW
, NONE
, WIDEN
} modifier
;
2230 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2233 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2236 /* Is GS a vectorizable call? */
2237 stmt
= dyn_cast
<gcall
*> (gs
);
2241 if (gimple_call_internal_p (stmt
)
2242 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2243 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2244 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2247 if (gimple_call_lhs (stmt
) == NULL_TREE
2248 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2251 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2253 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2255 /* Process function arguments. */
2256 rhs_type
= NULL_TREE
;
2257 vectype_in
= NULL_TREE
;
2258 nargs
= gimple_call_num_args (stmt
);
2260 /* Bail out if the function has more than three arguments, we do not have
2261 interesting builtin functions to vectorize with more than two arguments
2262 except for fma. No arguments is also not good. */
2263 if (nargs
== 0 || nargs
> 3)
2266 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2267 if (gimple_call_internal_p (stmt
)
2268 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2271 rhs_type
= unsigned_type_node
;
2274 for (i
= 0; i
< nargs
; i
++)
2278 op
= gimple_call_arg (stmt
, i
);
2280 /* We can only handle calls with arguments of the same type. */
2282 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2286 "argument types differ.\n");
2290 rhs_type
= TREE_TYPE (op
);
2292 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2293 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2297 "use not simple.\n");
2302 vectype_in
= opvectype
;
2304 && opvectype
!= vectype_in
)
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2308 "argument vector types differ.\n");
2312 /* If all arguments are external or constant defs use a vector type with
2313 the same size as the output vector type. */
2315 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2317 gcc_assert (vectype_in
);
2320 if (dump_enabled_p ())
2322 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2323 "no vectype for scalar type ");
2324 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2325 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2332 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2333 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2334 if (nunits_in
== nunits_out
/ 2)
2336 else if (nunits_out
== nunits_in
)
2338 else if (nunits_out
== nunits_in
/ 2)
2343 /* For now, we only vectorize functions if a target specific builtin
2344 is available. TODO -- in some cases, it might be profitable to
2345 insert the calls for pieces of the vector, in order to be able
2346 to vectorize other operations in the loop. */
2347 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2348 if (fndecl
== NULL_TREE
)
2350 if (gimple_call_internal_p (stmt
)
2351 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2354 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2355 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2356 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2357 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2359 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2360 { 0, 1, 2, ... vf - 1 } vector. */
2361 gcc_assert (nargs
== 0);
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2367 "function is not vectorizable.\n");
2372 gcc_assert (!gimple_vuse (stmt
));
2374 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2376 else if (modifier
== NARROW
)
2377 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2379 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2381 /* Sanity check: make sure that at least one copy of the vectorized stmt
2382 needs to be generated. */
2383 gcc_assert (ncopies
>= 1);
2385 if (!vec_stmt
) /* transformation not required. */
2387 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2388 if (dump_enabled_p ())
2389 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2391 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2401 scalar_dest
= gimple_call_lhs (stmt
);
2402 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2404 prev_stmt_info
= NULL
;
2408 for (j
= 0; j
< ncopies
; ++j
)
2410 /* Build argument list for the vectorized call. */
2412 vargs
.create (nargs
);
2418 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2419 vec
<tree
> vec_oprnds0
;
2421 for (i
= 0; i
< nargs
; i
++)
2422 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2423 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2424 vec_oprnds0
= vec_defs
[0];
2426 /* Arguments are ready. Create the new vector stmt. */
2427 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2430 for (k
= 0; k
< nargs
; k
++)
2432 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2433 vargs
[k
] = vec_oprndsk
[i
];
2435 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2436 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2437 gimple_call_set_lhs (new_stmt
, new_temp
);
2438 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2439 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2442 for (i
= 0; i
< nargs
; i
++)
2444 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2445 vec_oprndsi
.release ();
2450 for (i
= 0; i
< nargs
; i
++)
2452 op
= gimple_call_arg (stmt
, i
);
2455 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2458 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2460 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2463 vargs
.quick_push (vec_oprnd0
);
2466 if (gimple_call_internal_p (stmt
)
2467 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2469 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2471 for (k
= 0; k
< nunits_out
; ++k
)
2472 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2473 tree cst
= build_vector (vectype_out
, v
);
2475 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2476 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2477 new_temp
= make_ssa_name (new_var
, init_stmt
);
2478 gimple_assign_set_lhs (init_stmt
, new_temp
);
2479 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2480 new_temp
= make_ssa_name (vec_dest
);
2481 new_stmt
= gimple_build_assign (new_temp
,
2482 gimple_assign_lhs (init_stmt
));
2486 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2487 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2488 gimple_call_set_lhs (new_stmt
, new_temp
);
2490 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2493 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2495 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2497 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2503 for (j
= 0; j
< ncopies
; ++j
)
2505 /* Build argument list for the vectorized call. */
2507 vargs
.create (nargs
* 2);
2513 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2514 vec
<tree
> vec_oprnds0
;
2516 for (i
= 0; i
< nargs
; i
++)
2517 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2518 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2519 vec_oprnds0
= vec_defs
[0];
2521 /* Arguments are ready. Create the new vector stmt. */
2522 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2526 for (k
= 0; k
< nargs
; k
++)
2528 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2529 vargs
.quick_push (vec_oprndsk
[i
]);
2530 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2532 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2533 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2534 gimple_call_set_lhs (new_stmt
, new_temp
);
2535 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2536 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2539 for (i
= 0; i
< nargs
; i
++)
2541 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2542 vec_oprndsi
.release ();
2547 for (i
= 0; i
< nargs
; i
++)
2549 op
= gimple_call_arg (stmt
, i
);
2553 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2555 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2559 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2561 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2563 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2566 vargs
.quick_push (vec_oprnd0
);
2567 vargs
.quick_push (vec_oprnd1
);
2570 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2571 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2572 gimple_call_set_lhs (new_stmt
, new_temp
);
2573 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2576 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2578 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2580 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2583 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2588 /* No current target implements this case. */
2594 /* The call in STMT might prevent it from being removed in dce.
2595 We however cannot remove it here, due to the way the ssa name
2596 it defines is mapped to the new definition. So just replace
2597 rhs of the statement with something harmless. */
2602 type
= TREE_TYPE (scalar_dest
);
2603 if (is_pattern_stmt_p (stmt_info
))
2604 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2606 lhs
= gimple_call_lhs (stmt
);
2607 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2608 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2609 set_vinfo_for_stmt (stmt
, NULL
);
2610 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2611 gsi_replace (gsi
, new_stmt
, false);
2617 struct simd_call_arg_info
2621 enum vect_def_type dt
;
2622 HOST_WIDE_INT linear_step
;
2626 /* Function vectorizable_simd_clone_call.
2628 Check if STMT performs a function call that can be vectorized
2629 by calling a simd clone of the function.
2630 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2631 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2632 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2635 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2636 gimple
*vec_stmt
, slp_tree slp_node
)
2641 tree vec_oprnd0
= NULL_TREE
;
2642 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2644 unsigned int nunits
;
2645 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2646 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2647 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2648 tree fndecl
, new_temp
, def
;
2650 gimple new_stmt
= NULL
;
2652 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2653 vec
<tree
> vargs
= vNULL
;
2655 tree lhs
, rtype
, ratype
;
2656 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2658 /* Is STMT a vectorizable call? */
2659 if (!is_gimple_call (stmt
))
2662 fndecl
= gimple_call_fndecl (stmt
);
2663 if (fndecl
== NULL_TREE
)
2666 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2667 if (node
== NULL
|| node
->simd_clones
== NULL
)
2670 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2673 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2676 if (gimple_call_lhs (stmt
)
2677 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2680 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2682 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2684 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2688 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2691 /* Process function arguments. */
2692 nargs
= gimple_call_num_args (stmt
);
2694 /* Bail out if the function has zero arguments. */
2698 arginfo
.create (nargs
);
2700 for (i
= 0; i
< nargs
; i
++)
2702 simd_call_arg_info thisarginfo
;
2705 thisarginfo
.linear_step
= 0;
2706 thisarginfo
.align
= 0;
2707 thisarginfo
.op
= NULL_TREE
;
2709 op
= gimple_call_arg (stmt
, i
);
2710 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2711 &def_stmt
, &def
, &thisarginfo
.dt
,
2712 &thisarginfo
.vectype
)
2713 || thisarginfo
.dt
== vect_uninitialized_def
)
2715 if (dump_enabled_p ())
2716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2717 "use not simple.\n");
2722 if (thisarginfo
.dt
== vect_constant_def
2723 || thisarginfo
.dt
== vect_external_def
)
2724 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2726 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2728 /* For linear arguments, the analyze phase should have saved
2729 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2730 if (i
* 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2731 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2])
2733 gcc_assert (vec_stmt
);
2734 thisarginfo
.linear_step
2735 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2]);
2737 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 1];
2738 /* If loop has been peeled for alignment, we need to adjust it. */
2739 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2740 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2743 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2744 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2];
2745 tree opt
= TREE_TYPE (thisarginfo
.op
);
2746 bias
= fold_convert (TREE_TYPE (step
), bias
);
2747 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2749 = fold_build2 (POINTER_TYPE_P (opt
)
2750 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2751 thisarginfo
.op
, bias
);
2755 && thisarginfo
.dt
!= vect_constant_def
2756 && thisarginfo
.dt
!= vect_external_def
2758 && TREE_CODE (op
) == SSA_NAME
2759 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2761 && tree_fits_shwi_p (iv
.step
))
2763 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2764 thisarginfo
.op
= iv
.base
;
2766 else if ((thisarginfo
.dt
== vect_constant_def
2767 || thisarginfo
.dt
== vect_external_def
)
2768 && POINTER_TYPE_P (TREE_TYPE (op
)))
2769 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2771 arginfo
.quick_push (thisarginfo
);
2774 unsigned int badness
= 0;
2775 struct cgraph_node
*bestn
= NULL
;
2776 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2777 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2779 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2780 n
= n
->simdclone
->next_clone
)
2782 unsigned int this_badness
= 0;
2783 if (n
->simdclone
->simdlen
2784 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2785 || n
->simdclone
->nargs
!= nargs
)
2787 if (n
->simdclone
->simdlen
2788 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2789 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2790 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2791 if (n
->simdclone
->inbranch
)
2792 this_badness
+= 2048;
2793 int target_badness
= targetm
.simd_clone
.usable (n
);
2794 if (target_badness
< 0)
2796 this_badness
+= target_badness
* 512;
2797 /* FORNOW: Have to add code to add the mask argument. */
2798 if (n
->simdclone
->inbranch
)
2800 for (i
= 0; i
< nargs
; i
++)
2802 switch (n
->simdclone
->args
[i
].arg_type
)
2804 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2805 if (!useless_type_conversion_p
2806 (n
->simdclone
->args
[i
].orig_type
,
2807 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2809 else if (arginfo
[i
].dt
== vect_constant_def
2810 || arginfo
[i
].dt
== vect_external_def
2811 || arginfo
[i
].linear_step
)
2814 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2815 if (arginfo
[i
].dt
!= vect_constant_def
2816 && arginfo
[i
].dt
!= vect_external_def
)
2819 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2820 if (arginfo
[i
].dt
== vect_constant_def
2821 || arginfo
[i
].dt
== vect_external_def
2822 || (arginfo
[i
].linear_step
2823 != n
->simdclone
->args
[i
].linear_step
))
2826 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2830 case SIMD_CLONE_ARG_TYPE_MASK
:
2833 if (i
== (size_t) -1)
2835 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2840 if (arginfo
[i
].align
)
2841 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2842 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2844 if (i
== (size_t) -1)
2846 if (bestn
== NULL
|| this_badness
< badness
)
2849 badness
= this_badness
;
2859 for (i
= 0; i
< nargs
; i
++)
2860 if ((arginfo
[i
].dt
== vect_constant_def
2861 || arginfo
[i
].dt
== vect_external_def
)
2862 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2865 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2867 if (arginfo
[i
].vectype
== NULL
2868 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2869 > bestn
->simdclone
->simdlen
))
2876 fndecl
= bestn
->decl
;
2877 nunits
= bestn
->simdclone
->simdlen
;
2878 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2880 /* If the function isn't const, only allow it in simd loops where user
2881 has asserted that at least nunits consecutive iterations can be
2882 performed using SIMD instructions. */
2883 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2884 && gimple_vuse (stmt
))
2890 /* Sanity check: make sure that at least one copy of the vectorized stmt
2891 needs to be generated. */
2892 gcc_assert (ncopies
>= 1);
2894 if (!vec_stmt
) /* transformation not required. */
2896 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
2897 for (i
= 0; i
< nargs
; i
++)
2898 if (bestn
->simdclone
->args
[i
].arg_type
2899 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
2901 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 2
2903 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
2904 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
2905 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
2906 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
2907 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
2909 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2910 if (dump_enabled_p ())
2911 dump_printf_loc (MSG_NOTE
, vect_location
,
2912 "=== vectorizable_simd_clone_call ===\n");
2913 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2920 if (dump_enabled_p ())
2921 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2924 scalar_dest
= gimple_call_lhs (stmt
);
2925 vec_dest
= NULL_TREE
;
2930 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2931 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2932 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2935 rtype
= TREE_TYPE (ratype
);
2939 prev_stmt_info
= NULL
;
2940 for (j
= 0; j
< ncopies
; ++j
)
2942 /* Build argument list for the vectorized call. */
2944 vargs
.create (nargs
);
2948 for (i
= 0; i
< nargs
; i
++)
2950 unsigned int k
, l
, m
, o
;
2952 op
= gimple_call_arg (stmt
, i
);
2953 switch (bestn
->simdclone
->args
[i
].arg_type
)
2955 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2956 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2957 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2958 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2960 if (TYPE_VECTOR_SUBPARTS (atype
)
2961 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2963 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2964 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2965 / TYPE_VECTOR_SUBPARTS (atype
));
2966 gcc_assert ((k
& (k
- 1)) == 0);
2969 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2972 vec_oprnd0
= arginfo
[i
].op
;
2973 if ((m
& (k
- 1)) == 0)
2975 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2978 arginfo
[i
].op
= vec_oprnd0
;
2980 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2982 bitsize_int ((m
& (k
- 1)) * prec
));
2984 = gimple_build_assign (make_ssa_name (atype
),
2986 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2987 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2991 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2992 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2993 gcc_assert ((k
& (k
- 1)) == 0);
2994 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2996 vec_alloc (ctor_elts
, k
);
2999 for (l
= 0; l
< k
; l
++)
3001 if (m
== 0 && l
== 0)
3003 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3006 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3008 arginfo
[i
].op
= vec_oprnd0
;
3011 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3015 vargs
.safe_push (vec_oprnd0
);
3018 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3020 = gimple_build_assign (make_ssa_name (atype
),
3022 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3023 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3028 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3029 vargs
.safe_push (op
);
3031 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3036 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3041 edge pe
= loop_preheader_edge (loop
);
3042 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3043 gcc_assert (!new_bb
);
3045 tree phi_res
= copy_ssa_name (op
);
3046 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3047 set_vinfo_for_stmt (new_phi
,
3048 new_stmt_vec_info (new_phi
, loop_vinfo
,
3050 add_phi_arg (new_phi
, arginfo
[i
].op
,
3051 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3053 = POINTER_TYPE_P (TREE_TYPE (op
))
3054 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3055 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3056 ? sizetype
: TREE_TYPE (op
);
3058 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3060 tree tcst
= wide_int_to_tree (type
, cst
);
3061 tree phi_arg
= copy_ssa_name (op
);
3063 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3064 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3065 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3066 set_vinfo_for_stmt (new_stmt
,
3067 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3069 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3071 arginfo
[i
].op
= phi_res
;
3072 vargs
.safe_push (phi_res
);
3077 = POINTER_TYPE_P (TREE_TYPE (op
))
3078 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3079 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3080 ? sizetype
: TREE_TYPE (op
);
3082 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3084 tree tcst
= wide_int_to_tree (type
, cst
);
3085 new_temp
= make_ssa_name (TREE_TYPE (op
));
3086 new_stmt
= gimple_build_assign (new_temp
, code
,
3087 arginfo
[i
].op
, tcst
);
3088 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3089 vargs
.safe_push (new_temp
);
3092 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3098 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3101 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3103 new_temp
= create_tmp_var (ratype
);
3104 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3105 == TYPE_VECTOR_SUBPARTS (rtype
))
3106 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3108 new_temp
= make_ssa_name (rtype
, new_stmt
);
3109 gimple_call_set_lhs (new_stmt
, new_temp
);
3111 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3115 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3118 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3119 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3120 gcc_assert ((k
& (k
- 1)) == 0);
3121 for (l
= 0; l
< k
; l
++)
3126 t
= build_fold_addr_expr (new_temp
);
3127 t
= build2 (MEM_REF
, vectype
, t
,
3128 build_int_cst (TREE_TYPE (t
),
3129 l
* prec
/ BITS_PER_UNIT
));
3132 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3133 size_int (prec
), bitsize_int (l
* prec
));
3135 = gimple_build_assign (make_ssa_name (vectype
), t
);
3136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3137 if (j
== 0 && l
== 0)
3138 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3140 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3142 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3147 tree clobber
= build_constructor (ratype
, NULL
);
3148 TREE_THIS_VOLATILE (clobber
) = 1;
3149 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3150 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3154 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3156 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3157 / TYPE_VECTOR_SUBPARTS (rtype
));
3158 gcc_assert ((k
& (k
- 1)) == 0);
3159 if ((j
& (k
- 1)) == 0)
3160 vec_alloc (ret_ctor_elts
, k
);
3163 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3164 for (m
= 0; m
< o
; m
++)
3166 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3167 size_int (m
), NULL_TREE
, NULL_TREE
);
3169 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3170 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3171 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3172 gimple_assign_lhs (new_stmt
));
3174 tree clobber
= build_constructor (ratype
, NULL
);
3175 TREE_THIS_VOLATILE (clobber
) = 1;
3176 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3177 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3180 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3181 if ((j
& (k
- 1)) != k
- 1)
3183 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3185 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3186 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3188 if ((unsigned) j
== k
- 1)
3189 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3191 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3193 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3198 tree t
= build_fold_addr_expr (new_temp
);
3199 t
= build2 (MEM_REF
, vectype
, t
,
3200 build_int_cst (TREE_TYPE (t
), 0));
3202 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3203 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3204 tree clobber
= build_constructor (ratype
, NULL
);
3205 TREE_THIS_VOLATILE (clobber
) = 1;
3206 vect_finish_stmt_generation (stmt
,
3207 gimple_build_assign (new_temp
,
3213 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3215 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3217 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3222 /* The call in STMT might prevent it from being removed in dce.
3223 We however cannot remove it here, due to the way the ssa name
3224 it defines is mapped to the new definition. So just replace
3225 rhs of the statement with something harmless. */
3232 type
= TREE_TYPE (scalar_dest
);
3233 if (is_pattern_stmt_p (stmt_info
))
3234 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3236 lhs
= gimple_call_lhs (stmt
);
3237 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3240 new_stmt
= gimple_build_nop ();
3241 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3242 set_vinfo_for_stmt (stmt
, NULL
);
3243 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3244 gsi_replace (gsi
, new_stmt
, true);
3245 unlink_stmt_vdef (stmt
);
3251 /* Function vect_gen_widened_results_half
3253 Create a vector stmt whose code, type, number of arguments, and result
3254 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3255 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3256 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3257 needs to be created (DECL is a function-decl of a target-builtin).
3258 STMT is the original scalar stmt that we are vectorizing. */
3261 vect_gen_widened_results_half (enum tree_code code
,
3263 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3264 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3270 /* Generate half of the widened result: */
3271 if (code
== CALL_EXPR
)
3273 /* Target specific support */
3274 if (op_type
== binary_op
)
3275 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3277 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3278 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3279 gimple_call_set_lhs (new_stmt
, new_temp
);
3283 /* Generic support */
3284 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3285 if (op_type
!= binary_op
)
3287 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3288 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3289 gimple_assign_set_lhs (new_stmt
, new_temp
);
3291 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3297 /* Get vectorized definitions for loop-based vectorization. For the first
3298 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3299 scalar operand), and for the rest we get a copy with
3300 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3301 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3302 The vectors are collected into VEC_OPRNDS. */
3305 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3306 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3310 /* Get first vector operand. */
3311 /* All the vector operands except the very first one (that is scalar oprnd)
3313 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3314 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3316 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3318 vec_oprnds
->quick_push (vec_oprnd
);
3320 /* Get second vector operand. */
3321 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3322 vec_oprnds
->quick_push (vec_oprnd
);
3326 /* For conversion in multiple steps, continue to get operands
3329 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3333 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3334 For multi-step conversions store the resulting vectors and call the function
3338 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3339 int multi_step_cvt
, gimple stmt
,
3341 gimple_stmt_iterator
*gsi
,
3342 slp_tree slp_node
, enum tree_code code
,
3343 stmt_vec_info
*prev_stmt_info
)
3346 tree vop0
, vop1
, new_tmp
, vec_dest
;
3348 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3350 vec_dest
= vec_dsts
.pop ();
3352 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3354 /* Create demotion operation. */
3355 vop0
= (*vec_oprnds
)[i
];
3356 vop1
= (*vec_oprnds
)[i
+ 1];
3357 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3358 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3359 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3360 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3363 /* Store the resulting vector for next recursive call. */
3364 (*vec_oprnds
)[i
/2] = new_tmp
;
3367 /* This is the last step of the conversion sequence. Store the
3368 vectors in SLP_NODE or in vector info of the scalar statement
3369 (or in STMT_VINFO_RELATED_STMT chain). */
3371 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3374 if (!*prev_stmt_info
)
3375 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3377 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3379 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3384 /* For multi-step demotion operations we first generate demotion operations
3385 from the source type to the intermediate types, and then combine the
3386 results (stored in VEC_OPRNDS) in demotion operation to the destination
3390 /* At each level of recursion we have half of the operands we had at the
3392 vec_oprnds
->truncate ((i
+1)/2);
3393 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3394 stmt
, vec_dsts
, gsi
, slp_node
,
3395 VEC_PACK_TRUNC_EXPR
,
3399 vec_dsts
.quick_push (vec_dest
);
3403 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3404 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3405 the resulting vectors and call the function recursively. */
3408 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3409 vec
<tree
> *vec_oprnds1
,
3410 gimple stmt
, tree vec_dest
,
3411 gimple_stmt_iterator
*gsi
,
3412 enum tree_code code1
,
3413 enum tree_code code2
, tree decl1
,
3414 tree decl2
, int op_type
)
3417 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3418 gimple new_stmt1
, new_stmt2
;
3419 vec
<tree
> vec_tmp
= vNULL
;
3421 vec_tmp
.create (vec_oprnds0
->length () * 2);
3422 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3424 if (op_type
== binary_op
)
3425 vop1
= (*vec_oprnds1
)[i
];
3429 /* Generate the two halves of promotion operation. */
3430 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3431 op_type
, vec_dest
, gsi
, stmt
);
3432 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3433 op_type
, vec_dest
, gsi
, stmt
);
3434 if (is_gimple_call (new_stmt1
))
3436 new_tmp1
= gimple_call_lhs (new_stmt1
);
3437 new_tmp2
= gimple_call_lhs (new_stmt2
);
3441 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3442 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3445 /* Store the results for the next step. */
3446 vec_tmp
.quick_push (new_tmp1
);
3447 vec_tmp
.quick_push (new_tmp2
);
3450 vec_oprnds0
->release ();
3451 *vec_oprnds0
= vec_tmp
;
3455 /* Check if STMT performs a conversion operation, that can be vectorized.
3456 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3457 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3458 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3461 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3462 gimple
*vec_stmt
, slp_tree slp_node
)
3466 tree op0
, op1
= NULL_TREE
;
3467 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3468 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3469 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3470 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3471 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3472 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3476 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3477 gimple new_stmt
= NULL
;
3478 stmt_vec_info prev_stmt_info
;
3481 tree vectype_out
, vectype_in
;
3483 tree lhs_type
, rhs_type
;
3484 enum { NARROW
, NONE
, WIDEN
} modifier
;
3485 vec
<tree
> vec_oprnds0
= vNULL
;
3486 vec
<tree
> vec_oprnds1
= vNULL
;
3488 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3489 int multi_step_cvt
= 0;
3490 vec
<tree
> vec_dsts
= vNULL
;
3491 vec
<tree
> interm_types
= vNULL
;
3492 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3494 machine_mode rhs_mode
;
3495 unsigned short fltsz
;
3497 /* Is STMT a vectorizable conversion? */
3499 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3502 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3505 if (!is_gimple_assign (stmt
))
3508 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3511 code
= gimple_assign_rhs_code (stmt
);
3512 if (!CONVERT_EXPR_CODE_P (code
)
3513 && code
!= FIX_TRUNC_EXPR
3514 && code
!= FLOAT_EXPR
3515 && code
!= WIDEN_MULT_EXPR
3516 && code
!= WIDEN_LSHIFT_EXPR
)
3519 op_type
= TREE_CODE_LENGTH (code
);
3521 /* Check types of lhs and rhs. */
3522 scalar_dest
= gimple_assign_lhs (stmt
);
3523 lhs_type
= TREE_TYPE (scalar_dest
);
3524 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3526 op0
= gimple_assign_rhs1 (stmt
);
3527 rhs_type
= TREE_TYPE (op0
);
3529 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3530 && !((INTEGRAL_TYPE_P (lhs_type
)
3531 && INTEGRAL_TYPE_P (rhs_type
))
3532 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3533 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3536 if ((INTEGRAL_TYPE_P (lhs_type
)
3537 && (TYPE_PRECISION (lhs_type
)
3538 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3539 || (INTEGRAL_TYPE_P (rhs_type
)
3540 && (TYPE_PRECISION (rhs_type
)
3541 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3543 if (dump_enabled_p ())
3544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3545 "type conversion to/from bit-precision unsupported."
3550 /* Check the operands of the operation. */
3551 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3552 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3554 if (dump_enabled_p ())
3555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3556 "use not simple.\n");
3559 if (op_type
== binary_op
)
3563 op1
= gimple_assign_rhs2 (stmt
);
3564 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3565 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3567 if (CONSTANT_CLASS_P (op0
))
3568 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3569 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3571 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3576 if (dump_enabled_p ())
3577 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3578 "use not simple.\n");
3583 /* If op0 is an external or constant defs use a vector type of
3584 the same size as the output vector type. */
3586 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3588 gcc_assert (vectype_in
);
3591 if (dump_enabled_p ())
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3594 "no vectype for scalar type ");
3595 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3596 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3602 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3603 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3604 if (nunits_in
< nunits_out
)
3606 else if (nunits_out
== nunits_in
)
3611 /* Multiple types in SLP are handled by creating the appropriate number of
3612 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3614 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3616 else if (modifier
== NARROW
)
3617 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3619 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3621 /* Sanity check: make sure that at least one copy of the vectorized stmt
3622 needs to be generated. */
3623 gcc_assert (ncopies
>= 1);
3625 /* Supportable by target? */
3629 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3631 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3636 if (dump_enabled_p ())
3637 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3638 "conversion not supported by target.\n");
3642 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3643 &code1
, &code2
, &multi_step_cvt
,
3646 /* Binary widening operation can only be supported directly by the
3648 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3652 if (code
!= FLOAT_EXPR
3653 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3654 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3657 rhs_mode
= TYPE_MODE (rhs_type
);
3658 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3659 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3660 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3661 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3664 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3665 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3666 if (cvt_type
== NULL_TREE
)
3669 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3671 if (!supportable_convert_operation (code
, vectype_out
,
3672 cvt_type
, &decl1
, &codecvt1
))
3675 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3676 cvt_type
, &codecvt1
,
3677 &codecvt2
, &multi_step_cvt
,
3681 gcc_assert (multi_step_cvt
== 0);
3683 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3684 vectype_in
, &code1
, &code2
,
3685 &multi_step_cvt
, &interm_types
))
3689 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3692 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3693 codecvt2
= ERROR_MARK
;
3697 interm_types
.safe_push (cvt_type
);
3698 cvt_type
= NULL_TREE
;
3703 gcc_assert (op_type
== unary_op
);
3704 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3705 &code1
, &multi_step_cvt
,
3709 if (code
!= FIX_TRUNC_EXPR
3710 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3711 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3714 rhs_mode
= TYPE_MODE (rhs_type
);
3716 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3717 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3718 if (cvt_type
== NULL_TREE
)
3720 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3723 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3724 &code1
, &multi_step_cvt
,
3733 if (!vec_stmt
) /* transformation not required. */
3735 if (dump_enabled_p ())
3736 dump_printf_loc (MSG_NOTE
, vect_location
,
3737 "=== vectorizable_conversion ===\n");
3738 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3740 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3741 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3743 else if (modifier
== NARROW
)
3745 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3746 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3750 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3751 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3753 interm_types
.release ();
3758 if (dump_enabled_p ())
3759 dump_printf_loc (MSG_NOTE
, vect_location
,
3760 "transform conversion. ncopies = %d.\n", ncopies
);
3762 if (op_type
== binary_op
)
3764 if (CONSTANT_CLASS_P (op0
))
3765 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3766 else if (CONSTANT_CLASS_P (op1
))
3767 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3770 /* In case of multi-step conversion, we first generate conversion operations
3771 to the intermediate types, and then from that types to the final one.
3772 We create vector destinations for the intermediate type (TYPES) received
3773 from supportable_*_operation, and store them in the correct order
3774 for future use in vect_create_vectorized_*_stmts (). */
3775 vec_dsts
.create (multi_step_cvt
+ 1);
3776 vec_dest
= vect_create_destination_var (scalar_dest
,
3777 (cvt_type
&& modifier
== WIDEN
)
3778 ? cvt_type
: vectype_out
);
3779 vec_dsts
.quick_push (vec_dest
);
3783 for (i
= interm_types
.length () - 1;
3784 interm_types
.iterate (i
, &intermediate_type
); i
--)
3786 vec_dest
= vect_create_destination_var (scalar_dest
,
3788 vec_dsts
.quick_push (vec_dest
);
3793 vec_dest
= vect_create_destination_var (scalar_dest
,
3795 ? vectype_out
: cvt_type
);
3799 if (modifier
== WIDEN
)
3801 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3802 if (op_type
== binary_op
)
3803 vec_oprnds1
.create (1);
3805 else if (modifier
== NARROW
)
3806 vec_oprnds0
.create (
3807 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3809 else if (code
== WIDEN_LSHIFT_EXPR
)
3810 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3813 prev_stmt_info
= NULL
;
3817 for (j
= 0; j
< ncopies
; j
++)
3820 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3823 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3825 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3827 /* Arguments are ready, create the new vector stmt. */
3828 if (code1
== CALL_EXPR
)
3830 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3831 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3832 gimple_call_set_lhs (new_stmt
, new_temp
);
3836 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3837 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3838 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3839 gimple_assign_set_lhs (new_stmt
, new_temp
);
3842 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3844 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3848 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3850 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3851 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3856 /* In case the vectorization factor (VF) is bigger than the number
3857 of elements that we can fit in a vectype (nunits), we have to
3858 generate more than one vector stmt - i.e - we need to "unroll"
3859 the vector stmt by a factor VF/nunits. */
3860 for (j
= 0; j
< ncopies
; j
++)
3867 if (code
== WIDEN_LSHIFT_EXPR
)
3872 /* Store vec_oprnd1 for every vector stmt to be created
3873 for SLP_NODE. We check during the analysis that all
3874 the shift arguments are the same. */
3875 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3876 vec_oprnds1
.quick_push (vec_oprnd1
);
3878 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3882 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3883 &vec_oprnds1
, slp_node
, -1);
3887 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3888 vec_oprnds0
.quick_push (vec_oprnd0
);
3889 if (op_type
== binary_op
)
3891 if (code
== WIDEN_LSHIFT_EXPR
)
3894 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3896 vec_oprnds1
.quick_push (vec_oprnd1
);
3902 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3903 vec_oprnds0
.truncate (0);
3904 vec_oprnds0
.quick_push (vec_oprnd0
);
3905 if (op_type
== binary_op
)
3907 if (code
== WIDEN_LSHIFT_EXPR
)
3910 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3912 vec_oprnds1
.truncate (0);
3913 vec_oprnds1
.quick_push (vec_oprnd1
);
3917 /* Arguments are ready. Create the new vector stmts. */
3918 for (i
= multi_step_cvt
; i
>= 0; i
--)
3920 tree this_dest
= vec_dsts
[i
];
3921 enum tree_code c1
= code1
, c2
= code2
;
3922 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3927 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3929 stmt
, this_dest
, gsi
,
3930 c1
, c2
, decl1
, decl2
,
3934 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3938 if (codecvt1
== CALL_EXPR
)
3940 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3941 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3942 gimple_call_set_lhs (new_stmt
, new_temp
);
3946 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3947 new_temp
= make_ssa_name (vec_dest
);
3948 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
3952 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3955 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3958 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3961 if (!prev_stmt_info
)
3962 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3964 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3965 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3970 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3974 /* In case the vectorization factor (VF) is bigger than the number
3975 of elements that we can fit in a vectype (nunits), we have to
3976 generate more than one vector stmt - i.e - we need to "unroll"
3977 the vector stmt by a factor VF/nunits. */
3978 for (j
= 0; j
< ncopies
; j
++)
3982 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3986 vec_oprnds0
.truncate (0);
3987 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3988 vect_pow2 (multi_step_cvt
) - 1);
3991 /* Arguments are ready. Create the new vector stmts. */
3993 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3995 if (codecvt1
== CALL_EXPR
)
3997 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3998 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3999 gimple_call_set_lhs (new_stmt
, new_temp
);
4003 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4004 new_temp
= make_ssa_name (vec_dest
);
4005 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4009 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4010 vec_oprnds0
[i
] = new_temp
;
4013 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4014 stmt
, vec_dsts
, gsi
,
4019 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4023 vec_oprnds0
.release ();
4024 vec_oprnds1
.release ();
4025 vec_dsts
.release ();
4026 interm_types
.release ();
4032 /* Function vectorizable_assignment.
4034 Check if STMT performs an assignment (copy) that can be vectorized.
4035 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4036 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4037 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4040 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4041 gimple
*vec_stmt
, slp_tree slp_node
)
4046 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4047 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4048 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4052 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4053 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4056 vec
<tree
> vec_oprnds
= vNULL
;
4058 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4059 gimple new_stmt
= NULL
;
4060 stmt_vec_info prev_stmt_info
= NULL
;
4061 enum tree_code code
;
4064 /* Multiple types in SLP are handled by creating the appropriate number of
4065 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4067 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4070 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4072 gcc_assert (ncopies
>= 1);
4074 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4077 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4080 /* Is vectorizable assignment? */
4081 if (!is_gimple_assign (stmt
))
4084 scalar_dest
= gimple_assign_lhs (stmt
);
4085 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4088 code
= gimple_assign_rhs_code (stmt
);
4089 if (gimple_assign_single_p (stmt
)
4090 || code
== PAREN_EXPR
4091 || CONVERT_EXPR_CODE_P (code
))
4092 op
= gimple_assign_rhs1 (stmt
);
4096 if (code
== VIEW_CONVERT_EXPR
)
4097 op
= TREE_OPERAND (op
, 0);
4099 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4100 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4102 if (dump_enabled_p ())
4103 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4104 "use not simple.\n");
4108 /* We can handle NOP_EXPR conversions that do not change the number
4109 of elements or the vector size. */
4110 if ((CONVERT_EXPR_CODE_P (code
)
4111 || code
== VIEW_CONVERT_EXPR
)
4113 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4114 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4115 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4118 /* We do not handle bit-precision changes. */
4119 if ((CONVERT_EXPR_CODE_P (code
)
4120 || code
== VIEW_CONVERT_EXPR
)
4121 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4122 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4123 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4124 || ((TYPE_PRECISION (TREE_TYPE (op
))
4125 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4126 /* But a conversion that does not change the bit-pattern is ok. */
4127 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4128 > TYPE_PRECISION (TREE_TYPE (op
)))
4129 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4131 if (dump_enabled_p ())
4132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4133 "type conversion to/from bit-precision "
4138 if (!vec_stmt
) /* transformation not required. */
4140 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4141 if (dump_enabled_p ())
4142 dump_printf_loc (MSG_NOTE
, vect_location
,
4143 "=== vectorizable_assignment ===\n");
4144 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4149 if (dump_enabled_p ())
4150 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4153 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4156 for (j
= 0; j
< ncopies
; j
++)
4160 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4162 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4164 /* Arguments are ready. create the new vector stmt. */
4165 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4167 if (CONVERT_EXPR_CODE_P (code
)
4168 || code
== VIEW_CONVERT_EXPR
)
4169 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4170 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4171 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4172 gimple_assign_set_lhs (new_stmt
, new_temp
);
4173 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4175 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4182 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4184 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4186 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4189 vec_oprnds
.release ();
4194 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4195 either as shift by a scalar or by a vector. */
4198 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4201 machine_mode vec_mode
;
4206 vectype
= get_vectype_for_scalar_type (scalar_type
);
4210 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4212 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4214 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4216 || (optab_handler (optab
, TYPE_MODE (vectype
))
4217 == CODE_FOR_nothing
))
4221 vec_mode
= TYPE_MODE (vectype
);
4222 icode
= (int) optab_handler (optab
, vec_mode
);
4223 if (icode
== CODE_FOR_nothing
)
4230 /* Function vectorizable_shift.
4232 Check if STMT performs a shift operation that can be vectorized.
4233 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4234 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4235 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4238 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4239 gimple
*vec_stmt
, slp_tree slp_node
)
4243 tree op0
, op1
= NULL
;
4244 tree vec_oprnd1
= NULL_TREE
;
4245 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4247 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4248 enum tree_code code
;
4249 machine_mode vec_mode
;
4253 machine_mode optab_op2_mode
;
4256 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4257 gimple new_stmt
= NULL
;
4258 stmt_vec_info prev_stmt_info
;
4265 vec
<tree
> vec_oprnds0
= vNULL
;
4266 vec
<tree
> vec_oprnds1
= vNULL
;
4269 bool scalar_shift_arg
= true;
4270 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4273 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4276 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4279 /* Is STMT a vectorizable binary/unary operation? */
4280 if (!is_gimple_assign (stmt
))
4283 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4286 code
= gimple_assign_rhs_code (stmt
);
4288 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4289 || code
== RROTATE_EXPR
))
4292 scalar_dest
= gimple_assign_lhs (stmt
);
4293 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4294 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4295 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4297 if (dump_enabled_p ())
4298 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4299 "bit-precision shifts not supported.\n");
4303 op0
= gimple_assign_rhs1 (stmt
);
4304 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4305 &def_stmt
, &def
, &dt
[0], &vectype
))
4307 if (dump_enabled_p ())
4308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4309 "use not simple.\n");
4312 /* If op0 is an external or constant def use a vector type with
4313 the same size as the output vector type. */
4315 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4317 gcc_assert (vectype
);
4320 if (dump_enabled_p ())
4321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4322 "no vectype for scalar type\n");
4326 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4327 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4328 if (nunits_out
!= nunits_in
)
4331 op1
= gimple_assign_rhs2 (stmt
);
4332 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4333 &def
, &dt
[1], &op1_vectype
))
4335 if (dump_enabled_p ())
4336 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4337 "use not simple.\n");
4342 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4346 /* Multiple types in SLP are handled by creating the appropriate number of
4347 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4349 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4352 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4354 gcc_assert (ncopies
>= 1);
4356 /* Determine whether the shift amount is a vector, or scalar. If the
4357 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4359 if (dt
[1] == vect_internal_def
&& !slp_node
)
4360 scalar_shift_arg
= false;
4361 else if (dt
[1] == vect_constant_def
4362 || dt
[1] == vect_external_def
4363 || dt
[1] == vect_internal_def
)
4365 /* In SLP, need to check whether the shift count is the same,
4366 in loops if it is a constant or invariant, it is always
4370 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4373 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4374 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4375 scalar_shift_arg
= false;
4380 if (dump_enabled_p ())
4381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4382 "operand mode requires invariant argument.\n");
4386 /* Vector shifted by vector. */
4387 if (!scalar_shift_arg
)
4389 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4390 if (dump_enabled_p ())
4391 dump_printf_loc (MSG_NOTE
, vect_location
,
4392 "vector/vector shift/rotate found.\n");
4395 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4396 if (op1_vectype
== NULL_TREE
4397 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4399 if (dump_enabled_p ())
4400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4401 "unusable type for last operand in"
4402 " vector/vector shift/rotate.\n");
4406 /* See if the machine has a vector shifted by scalar insn and if not
4407 then see if it has a vector shifted by vector insn. */
4410 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4412 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4414 if (dump_enabled_p ())
4415 dump_printf_loc (MSG_NOTE
, vect_location
,
4416 "vector/scalar shift/rotate found.\n");
4420 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4422 && (optab_handler (optab
, TYPE_MODE (vectype
))
4423 != CODE_FOR_nothing
))
4425 scalar_shift_arg
= false;
4427 if (dump_enabled_p ())
4428 dump_printf_loc (MSG_NOTE
, vect_location
,
4429 "vector/vector shift/rotate found.\n");
4431 /* Unlike the other binary operators, shifts/rotates have
4432 the rhs being int, instead of the same type as the lhs,
4433 so make sure the scalar is the right type if we are
4434 dealing with vectors of long long/long/short/char. */
4435 if (dt
[1] == vect_constant_def
)
4436 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4437 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4441 && TYPE_MODE (TREE_TYPE (vectype
))
4442 != TYPE_MODE (TREE_TYPE (op1
)))
4444 if (dump_enabled_p ())
4445 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4446 "unusable type for last operand in"
4447 " vector/vector shift/rotate.\n");
4450 if (vec_stmt
&& !slp_node
)
4452 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4453 op1
= vect_init_vector (stmt
, op1
,
4454 TREE_TYPE (vectype
), NULL
);
4461 /* Supportable by target? */
4464 if (dump_enabled_p ())
4465 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4469 vec_mode
= TYPE_MODE (vectype
);
4470 icode
= (int) optab_handler (optab
, vec_mode
);
4471 if (icode
== CODE_FOR_nothing
)
4473 if (dump_enabled_p ())
4474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4475 "op not supported by target.\n");
4476 /* Check only during analysis. */
4477 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4478 || (vf
< vect_min_worthwhile_factor (code
)
4481 if (dump_enabled_p ())
4482 dump_printf_loc (MSG_NOTE
, vect_location
,
4483 "proceeding using word mode.\n");
4486 /* Worthwhile without SIMD support? Check only during analysis. */
4487 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4488 && vf
< vect_min_worthwhile_factor (code
)
4491 if (dump_enabled_p ())
4492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4493 "not worthwhile without SIMD support.\n");
4497 if (!vec_stmt
) /* transformation not required. */
4499 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4500 if (dump_enabled_p ())
4501 dump_printf_loc (MSG_NOTE
, vect_location
,
4502 "=== vectorizable_shift ===\n");
4503 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4509 if (dump_enabled_p ())
4510 dump_printf_loc (MSG_NOTE
, vect_location
,
4511 "transform binary/unary operation.\n");
4514 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4516 prev_stmt_info
= NULL
;
4517 for (j
= 0; j
< ncopies
; j
++)
4522 if (scalar_shift_arg
)
4524 /* Vector shl and shr insn patterns can be defined with scalar
4525 operand 2 (shift operand). In this case, use constant or loop
4526 invariant op1 directly, without extending it to vector mode
4528 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4529 if (!VECTOR_MODE_P (optab_op2_mode
))
4531 if (dump_enabled_p ())
4532 dump_printf_loc (MSG_NOTE
, vect_location
,
4533 "operand 1 using scalar mode.\n");
4535 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4536 vec_oprnds1
.quick_push (vec_oprnd1
);
4539 /* Store vec_oprnd1 for every vector stmt to be created
4540 for SLP_NODE. We check during the analysis that all
4541 the shift arguments are the same.
4542 TODO: Allow different constants for different vector
4543 stmts generated for an SLP instance. */
4544 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4545 vec_oprnds1
.quick_push (vec_oprnd1
);
4550 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4551 (a special case for certain kind of vector shifts); otherwise,
4552 operand 1 should be of a vector type (the usual case). */
4554 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4557 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4561 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4563 /* Arguments are ready. Create the new vector stmt. */
4564 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4566 vop1
= vec_oprnds1
[i
];
4567 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4568 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4569 gimple_assign_set_lhs (new_stmt
, new_temp
);
4570 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4572 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4579 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4581 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4582 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4585 vec_oprnds0
.release ();
4586 vec_oprnds1
.release ();
4592 /* Function vectorizable_operation.
4594 Check if STMT performs a binary, unary or ternary operation that can
4596 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4597 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4598 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4601 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4602 gimple
*vec_stmt
, slp_tree slp_node
)
4606 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4607 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4609 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4610 enum tree_code code
;
4611 machine_mode vec_mode
;
4618 enum vect_def_type dt
[3]
4619 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4620 gimple new_stmt
= NULL
;
4621 stmt_vec_info prev_stmt_info
;
4627 vec
<tree
> vec_oprnds0
= vNULL
;
4628 vec
<tree
> vec_oprnds1
= vNULL
;
4629 vec
<tree
> vec_oprnds2
= vNULL
;
4630 tree vop0
, vop1
, vop2
;
4631 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4634 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4637 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4640 /* Is STMT a vectorizable binary/unary operation? */
4641 if (!is_gimple_assign (stmt
))
4644 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4647 code
= gimple_assign_rhs_code (stmt
);
4649 /* For pointer addition, we should use the normal plus for
4650 the vector addition. */
4651 if (code
== POINTER_PLUS_EXPR
)
4654 /* Support only unary or binary operations. */
4655 op_type
= TREE_CODE_LENGTH (code
);
4656 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4658 if (dump_enabled_p ())
4659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4660 "num. args = %d (not unary/binary/ternary op).\n",
4665 scalar_dest
= gimple_assign_lhs (stmt
);
4666 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4668 /* Most operations cannot handle bit-precision types without extra
4670 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4671 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4672 /* Exception are bitwise binary operations. */
4673 && code
!= BIT_IOR_EXPR
4674 && code
!= BIT_XOR_EXPR
4675 && code
!= BIT_AND_EXPR
)
4677 if (dump_enabled_p ())
4678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4679 "bit-precision arithmetic not supported.\n");
4683 op0
= gimple_assign_rhs1 (stmt
);
4684 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4685 &def_stmt
, &def
, &dt
[0], &vectype
))
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4689 "use not simple.\n");
4692 /* If op0 is an external or constant def use a vector type with
4693 the same size as the output vector type. */
4695 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4697 gcc_assert (vectype
);
4700 if (dump_enabled_p ())
4702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4703 "no vectype for scalar type ");
4704 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4706 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4712 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4713 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4714 if (nunits_out
!= nunits_in
)
4717 if (op_type
== binary_op
|| op_type
== ternary_op
)
4719 op1
= gimple_assign_rhs2 (stmt
);
4720 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4723 if (dump_enabled_p ())
4724 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4725 "use not simple.\n");
4729 if (op_type
== ternary_op
)
4731 op2
= gimple_assign_rhs3 (stmt
);
4732 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4735 if (dump_enabled_p ())
4736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4737 "use not simple.\n");
4743 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4747 /* Multiple types in SLP are handled by creating the appropriate number of
4748 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4750 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4753 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4755 gcc_assert (ncopies
>= 1);
4757 /* Shifts are handled in vectorizable_shift (). */
4758 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4759 || code
== RROTATE_EXPR
)
4762 /* Supportable by target? */
4764 vec_mode
= TYPE_MODE (vectype
);
4765 if (code
== MULT_HIGHPART_EXPR
)
4767 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4768 icode
= LAST_INSN_CODE
;
4770 icode
= CODE_FOR_nothing
;
4774 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4777 if (dump_enabled_p ())
4778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4782 icode
= (int) optab_handler (optab
, vec_mode
);
4785 if (icode
== CODE_FOR_nothing
)
4787 if (dump_enabled_p ())
4788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4789 "op not supported by target.\n");
4790 /* Check only during analysis. */
4791 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4792 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4794 if (dump_enabled_p ())
4795 dump_printf_loc (MSG_NOTE
, vect_location
,
4796 "proceeding using word mode.\n");
4799 /* Worthwhile without SIMD support? Check only during analysis. */
4800 if (!VECTOR_MODE_P (vec_mode
)
4802 && vf
< vect_min_worthwhile_factor (code
))
4804 if (dump_enabled_p ())
4805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4806 "not worthwhile without SIMD support.\n");
4810 if (!vec_stmt
) /* transformation not required. */
4812 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4813 if (dump_enabled_p ())
4814 dump_printf_loc (MSG_NOTE
, vect_location
,
4815 "=== vectorizable_operation ===\n");
4816 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4822 if (dump_enabled_p ())
4823 dump_printf_loc (MSG_NOTE
, vect_location
,
4824 "transform binary/unary operation.\n");
4827 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4829 /* In case the vectorization factor (VF) is bigger than the number
4830 of elements that we can fit in a vectype (nunits), we have to generate
4831 more than one vector stmt - i.e - we need to "unroll" the
4832 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4833 from one copy of the vector stmt to the next, in the field
4834 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4835 stages to find the correct vector defs to be used when vectorizing
4836 stmts that use the defs of the current stmt. The example below
4837 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4838 we need to create 4 vectorized stmts):
4840 before vectorization:
4841 RELATED_STMT VEC_STMT
4845 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4847 RELATED_STMT VEC_STMT
4848 VS1_0: vx0 = memref0 VS1_1 -
4849 VS1_1: vx1 = memref1 VS1_2 -
4850 VS1_2: vx2 = memref2 VS1_3 -
4851 VS1_3: vx3 = memref3 - -
4852 S1: x = load - VS1_0
4855 step2: vectorize stmt S2 (done here):
4856 To vectorize stmt S2 we first need to find the relevant vector
4857 def for the first operand 'x'. This is, as usual, obtained from
4858 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4859 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4860 relevant vector def 'vx0'. Having found 'vx0' we can generate
4861 the vector stmt VS2_0, and as usual, record it in the
4862 STMT_VINFO_VEC_STMT of stmt S2.
4863 When creating the second copy (VS2_1), we obtain the relevant vector
4864 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4865 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4866 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4867 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4868 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4869 chain of stmts and pointers:
4870 RELATED_STMT VEC_STMT
4871 VS1_0: vx0 = memref0 VS1_1 -
4872 VS1_1: vx1 = memref1 VS1_2 -
4873 VS1_2: vx2 = memref2 VS1_3 -
4874 VS1_3: vx3 = memref3 - -
4875 S1: x = load - VS1_0
4876 VS2_0: vz0 = vx0 + v1 VS2_1 -
4877 VS2_1: vz1 = vx1 + v1 VS2_2 -
4878 VS2_2: vz2 = vx2 + v1 VS2_3 -
4879 VS2_3: vz3 = vx3 + v1 - -
4880 S2: z = x + 1 - VS2_0 */
4882 prev_stmt_info
= NULL
;
4883 for (j
= 0; j
< ncopies
; j
++)
4888 if (op_type
== binary_op
|| op_type
== ternary_op
)
4889 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4892 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4894 if (op_type
== ternary_op
)
4896 vec_oprnds2
.create (1);
4897 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4904 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4905 if (op_type
== ternary_op
)
4907 tree vec_oprnd
= vec_oprnds2
.pop ();
4908 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4913 /* Arguments are ready. Create the new vector stmt. */
4914 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4916 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4917 ? vec_oprnds1
[i
] : NULL_TREE
);
4918 vop2
= ((op_type
== ternary_op
)
4919 ? vec_oprnds2
[i
] : NULL_TREE
);
4920 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
4921 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4922 gimple_assign_set_lhs (new_stmt
, new_temp
);
4923 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4925 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4932 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4934 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4935 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4938 vec_oprnds0
.release ();
4939 vec_oprnds1
.release ();
4940 vec_oprnds2
.release ();
4945 /* A helper function to ensure data reference DR's base alignment
4949 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4954 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4956 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4957 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4959 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4960 DECL_USER_ALIGN (base_decl
) = 1;
4961 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4966 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4967 reversal of the vector elements. If that is impossible to do,
4971 perm_mask_for_reverse (tree vectype
)
4976 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4977 sel
= XALLOCAVEC (unsigned char, nunits
);
4979 for (i
= 0; i
< nunits
; ++i
)
4980 sel
[i
] = nunits
- 1 - i
;
4982 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4984 return vect_gen_perm_mask_checked (vectype
, sel
);
4987 /* Function vectorizable_store.
4989 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4991 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4992 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4993 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4996 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5002 tree vec_oprnd
= NULL_TREE
;
5003 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5004 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5005 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5007 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5008 struct loop
*loop
= NULL
;
5009 machine_mode vec_mode
;
5011 enum dr_alignment_support alignment_support_scheme
;
5014 enum vect_def_type dt
;
5015 stmt_vec_info prev_stmt_info
= NULL
;
5016 tree dataref_ptr
= NULL_TREE
;
5017 tree dataref_offset
= NULL_TREE
;
5018 gimple ptr_incr
= NULL
;
5019 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5022 gimple next_stmt
, first_stmt
= NULL
;
5023 bool grouped_store
= false;
5024 bool store_lanes_p
= false;
5025 unsigned int group_size
, i
;
5026 vec
<tree
> dr_chain
= vNULL
;
5027 vec
<tree
> oprnds
= vNULL
;
5028 vec
<tree
> result_chain
= vNULL
;
5030 bool negative
= false;
5031 tree offset
= NULL_TREE
;
5032 vec
<tree
> vec_oprnds
= vNULL
;
5033 bool slp
= (slp_node
!= NULL
);
5034 unsigned int vec_num
;
5035 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5039 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5041 /* Multiple types in SLP are handled by creating the appropriate number of
5042 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5044 if (slp
|| PURE_SLP_STMT (stmt_info
))
5047 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5049 gcc_assert (ncopies
>= 1);
5051 /* FORNOW. This restriction should be relaxed. */
5052 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5054 if (dump_enabled_p ())
5055 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5056 "multiple types in nested loop.\n");
5060 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5063 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5066 /* Is vectorizable store? */
5068 if (!is_gimple_assign (stmt
))
5071 scalar_dest
= gimple_assign_lhs (stmt
);
5072 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5073 && is_pattern_stmt_p (stmt_info
))
5074 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5075 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5076 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5077 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5078 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5079 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5080 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5081 && TREE_CODE (scalar_dest
) != MEM_REF
)
5084 gcc_assert (gimple_assign_single_p (stmt
));
5085 op
= gimple_assign_rhs1 (stmt
);
5086 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5089 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5091 "use not simple.\n");
5095 elem_type
= TREE_TYPE (vectype
);
5096 vec_mode
= TYPE_MODE (vectype
);
5098 /* FORNOW. In some cases can vectorize even if data-type not supported
5099 (e.g. - array initialization with 0). */
5100 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5103 if (!STMT_VINFO_DATA_REF (stmt_info
))
5107 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5108 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5109 size_zero_node
) < 0;
5110 if (negative
&& ncopies
> 1)
5112 if (dump_enabled_p ())
5113 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5114 "multiple types with negative step.\n");
5120 gcc_assert (!grouped_store
);
5121 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5122 if (alignment_support_scheme
!= dr_aligned
5123 && alignment_support_scheme
!= dr_unaligned_supported
)
5125 if (dump_enabled_p ())
5126 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5127 "negative step but alignment required.\n");
5130 if (dt
!= vect_constant_def
5131 && dt
!= vect_external_def
5132 && !perm_mask_for_reverse (vectype
))
5134 if (dump_enabled_p ())
5135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5136 "negative step and reversing not supported.\n");
5141 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5143 grouped_store
= true;
5144 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5145 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5147 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5148 if (vect_store_lanes_supported (vectype
, group_size
))
5149 store_lanes_p
= true;
5150 else if (!vect_grouped_store_supported (vectype
, group_size
))
5154 if (first_stmt
== stmt
)
5156 /* STMT is the leader of the group. Check the operands of all the
5157 stmts of the group. */
5158 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5161 gcc_assert (gimple_assign_single_p (next_stmt
));
5162 op
= gimple_assign_rhs1 (next_stmt
);
5163 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5164 &def_stmt
, &def
, &dt
))
5166 if (dump_enabled_p ())
5167 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5168 "use not simple.\n");
5171 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5176 if (!vec_stmt
) /* transformation not required. */
5178 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5179 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5186 ensure_base_align (stmt_info
, dr
);
5190 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5191 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5193 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5196 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5198 /* We vectorize all the stmts of the interleaving group when we
5199 reach the last stmt in the group. */
5200 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5201 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5210 grouped_store
= false;
5211 /* VEC_NUM is the number of vect stmts to be created for this
5213 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5214 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5215 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5216 op
= gimple_assign_rhs1 (first_stmt
);
5219 /* VEC_NUM is the number of vect stmts to be created for this
5221 vec_num
= group_size
;
5227 group_size
= vec_num
= 1;
5230 if (dump_enabled_p ())
5231 dump_printf_loc (MSG_NOTE
, vect_location
,
5232 "transform store. ncopies = %d\n", ncopies
);
5234 dr_chain
.create (group_size
);
5235 oprnds
.create (group_size
);
5237 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5238 gcc_assert (alignment_support_scheme
);
5239 /* Targets with store-lane instructions must not require explicit
5241 gcc_assert (!store_lanes_p
5242 || alignment_support_scheme
== dr_aligned
5243 || alignment_support_scheme
== dr_unaligned_supported
);
5246 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5249 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5251 aggr_type
= vectype
;
5253 /* In case the vectorization factor (VF) is bigger than the number
5254 of elements that we can fit in a vectype (nunits), we have to generate
5255 more than one vector stmt - i.e - we need to "unroll" the
5256 vector stmt by a factor VF/nunits. For more details see documentation in
5257 vect_get_vec_def_for_copy_stmt. */
5259 /* In case of interleaving (non-unit grouped access):
5266 We create vectorized stores starting from base address (the access of the
5267 first stmt in the chain (S2 in the above example), when the last store stmt
5268 of the chain (S4) is reached:
5271 VS2: &base + vec_size*1 = vx0
5272 VS3: &base + vec_size*2 = vx1
5273 VS4: &base + vec_size*3 = vx3
5275 Then permutation statements are generated:
5277 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5278 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5281 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5282 (the order of the data-refs in the output of vect_permute_store_chain
5283 corresponds to the order of scalar stmts in the interleaving chain - see
5284 the documentation of vect_permute_store_chain()).
5286 In case of both multiple types and interleaving, above vector stores and
5287 permutation stmts are created for every copy. The result vector stmts are
5288 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5289 STMT_VINFO_RELATED_STMT for the next copies.
5292 prev_stmt_info
= NULL
;
5293 for (j
= 0; j
< ncopies
; j
++)
5301 /* Get vectorized arguments for SLP_NODE. */
5302 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5303 NULL
, slp_node
, -1);
5305 vec_oprnd
= vec_oprnds
[0];
5309 /* For interleaved stores we collect vectorized defs for all the
5310 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5311 used as an input to vect_permute_store_chain(), and OPRNDS as
5312 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5314 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5315 OPRNDS are of size 1. */
5316 next_stmt
= first_stmt
;
5317 for (i
= 0; i
< group_size
; i
++)
5319 /* Since gaps are not supported for interleaved stores,
5320 GROUP_SIZE is the exact number of stmts in the chain.
5321 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5322 there is no interleaving, GROUP_SIZE is 1, and only one
5323 iteration of the loop will be executed. */
5324 gcc_assert (next_stmt
5325 && gimple_assign_single_p (next_stmt
));
5326 op
= gimple_assign_rhs1 (next_stmt
);
5328 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5330 dr_chain
.quick_push (vec_oprnd
);
5331 oprnds
.quick_push (vec_oprnd
);
5332 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5336 /* We should have catched mismatched types earlier. */
5337 gcc_assert (useless_type_conversion_p (vectype
,
5338 TREE_TYPE (vec_oprnd
)));
5339 bool simd_lane_access_p
5340 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5341 if (simd_lane_access_p
5342 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5343 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5344 && integer_zerop (DR_OFFSET (first_dr
))
5345 && integer_zerop (DR_INIT (first_dr
))
5346 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5347 get_alias_set (DR_REF (first_dr
))))
5349 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5350 dataref_offset
= build_int_cst (reference_alias_ptr_type
5351 (DR_REF (first_dr
)), 0);
5356 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5357 simd_lane_access_p
? loop
: NULL
,
5358 offset
, &dummy
, gsi
, &ptr_incr
,
5359 simd_lane_access_p
, &inv_p
);
5360 gcc_assert (bb_vinfo
|| !inv_p
);
5364 /* For interleaved stores we created vectorized defs for all the
5365 defs stored in OPRNDS in the previous iteration (previous copy).
5366 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5367 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5369 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5370 OPRNDS are of size 1. */
5371 for (i
= 0; i
< group_size
; i
++)
5374 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5376 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5377 dr_chain
[i
] = vec_oprnd
;
5378 oprnds
[i
] = vec_oprnd
;
5382 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5383 TYPE_SIZE_UNIT (aggr_type
));
5385 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5386 TYPE_SIZE_UNIT (aggr_type
));
5393 /* Combine all the vectors into an array. */
5394 vec_array
= create_vector_array (vectype
, vec_num
);
5395 for (i
= 0; i
< vec_num
; i
++)
5397 vec_oprnd
= dr_chain
[i
];
5398 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5402 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5403 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5404 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5405 gimple_call_set_lhs (new_stmt
, data_ref
);
5406 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5414 result_chain
.create (group_size
);
5416 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5420 next_stmt
= first_stmt
;
5421 for (i
= 0; i
< vec_num
; i
++)
5423 unsigned align
, misalign
;
5426 /* Bump the vector pointer. */
5427 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5431 vec_oprnd
= vec_oprnds
[i
];
5432 else if (grouped_store
)
5433 /* For grouped stores vectorized defs are interleaved in
5434 vect_permute_store_chain(). */
5435 vec_oprnd
= result_chain
[i
];
5437 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5440 : build_int_cst (reference_alias_ptr_type
5441 (DR_REF (first_dr
)), 0));
5442 align
= TYPE_ALIGN_UNIT (vectype
);
5443 if (aligned_access_p (first_dr
))
5445 else if (DR_MISALIGNMENT (first_dr
) == -1)
5447 TREE_TYPE (data_ref
)
5448 = build_aligned_type (TREE_TYPE (data_ref
),
5449 TYPE_ALIGN (elem_type
));
5450 align
= TYPE_ALIGN_UNIT (elem_type
);
5455 TREE_TYPE (data_ref
)
5456 = build_aligned_type (TREE_TYPE (data_ref
),
5457 TYPE_ALIGN (elem_type
));
5458 misalign
= DR_MISALIGNMENT (first_dr
);
5460 if (dataref_offset
== NULL_TREE
)
5461 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5465 && dt
!= vect_constant_def
5466 && dt
!= vect_external_def
)
5468 tree perm_mask
= perm_mask_for_reverse (vectype
);
5470 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5472 tree new_temp
= make_ssa_name (perm_dest
);
5474 /* Generate the permute statement. */
5476 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
5477 vec_oprnd
, perm_mask
);
5478 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5480 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5481 vec_oprnd
= new_temp
;
5484 /* Arguments are ready. Create the new vector stmt. */
5485 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5486 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5491 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5499 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5501 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5502 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5506 dr_chain
.release ();
5508 result_chain
.release ();
5509 vec_oprnds
.release ();
5514 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5515 VECTOR_CST mask. No checks are made that the target platform supports the
5516 mask, so callers may wish to test can_vec_perm_p separately, or use
5517 vect_gen_perm_mask_checked. */
5520 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
5522 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5525 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5527 mask_elt_type
= lang_hooks
.types
.type_for_mode
5528 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5529 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5531 mask_elts
= XALLOCAVEC (tree
, nunits
);
5532 for (i
= nunits
- 1; i
>= 0; i
--)
5533 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5534 mask_vec
= build_vector (mask_type
, mask_elts
);
5539 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5540 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5543 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
5545 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
5546 return vect_gen_perm_mask_any (vectype
, sel
);
5549 /* Given a vector variable X and Y, that was generated for the scalar
5550 STMT, generate instructions to permute the vector elements of X and Y
5551 using permutation mask MASK_VEC, insert them at *GSI and return the
5552 permuted vector variable. */
5555 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5556 gimple_stmt_iterator
*gsi
)
5558 tree vectype
= TREE_TYPE (x
);
5559 tree perm_dest
, data_ref
;
5562 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5563 data_ref
= make_ssa_name (perm_dest
);
5565 /* Generate the permute statement. */
5566 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
5567 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5572 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5573 inserting them on the loops preheader edge. Returns true if we
5574 were successful in doing so (and thus STMT can be moved then),
5575 otherwise returns false. */
5578 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5584 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5586 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5587 if (!gimple_nop_p (def_stmt
)
5588 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5590 /* Make sure we don't need to recurse. While we could do
5591 so in simple cases when there are more complex use webs
5592 we don't have an easy way to preserve stmt order to fulfil
5593 dependencies within them. */
5596 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5598 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5600 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5601 if (!gimple_nop_p (def_stmt2
)
5602 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5612 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5614 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5615 if (!gimple_nop_p (def_stmt
)
5616 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5618 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5619 gsi_remove (&gsi
, false);
5620 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5627 /* vectorizable_load.
5629 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5631 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5632 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5633 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5636 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5637 slp_tree slp_node
, slp_instance slp_node_instance
)
5640 tree vec_dest
= NULL
;
5641 tree data_ref
= NULL
;
5642 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5643 stmt_vec_info prev_stmt_info
;
5644 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5645 struct loop
*loop
= NULL
;
5646 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5647 bool nested_in_vect_loop
= false;
5648 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5649 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5653 gimple new_stmt
= NULL
;
5655 enum dr_alignment_support alignment_support_scheme
;
5656 tree dataref_ptr
= NULL_TREE
;
5657 tree dataref_offset
= NULL_TREE
;
5658 gimple ptr_incr
= NULL
;
5659 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5661 int i
, j
, group_size
, group_gap
;
5662 tree msq
= NULL_TREE
, lsq
;
5663 tree offset
= NULL_TREE
;
5664 tree byte_offset
= NULL_TREE
;
5665 tree realignment_token
= NULL_TREE
;
5667 vec
<tree
> dr_chain
= vNULL
;
5668 bool grouped_load
= false;
5669 bool load_lanes_p
= false;
5672 bool negative
= false;
5673 bool compute_in_loop
= false;
5674 struct loop
*at_loop
;
5676 bool slp
= (slp_node
!= NULL
);
5677 bool slp_perm
= false;
5678 enum tree_code code
;
5679 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5682 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5683 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5684 int gather_scale
= 1;
5685 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5689 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5690 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5691 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5696 /* Multiple types in SLP are handled by creating the appropriate number of
5697 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5699 if (slp
|| PURE_SLP_STMT (stmt_info
))
5702 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5704 gcc_assert (ncopies
>= 1);
5706 /* FORNOW. This restriction should be relaxed. */
5707 if (nested_in_vect_loop
&& ncopies
> 1)
5709 if (dump_enabled_p ())
5710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5711 "multiple types in nested loop.\n");
5715 /* Invalidate assumptions made by dependence analysis when vectorization
5716 on the unrolled body effectively re-orders stmts. */
5718 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5719 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5720 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5722 if (dump_enabled_p ())
5723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5724 "cannot perform implicit CSE when unrolling "
5725 "with negative dependence distance\n");
5729 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5732 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5735 /* Is vectorizable load? */
5736 if (!is_gimple_assign (stmt
))
5739 scalar_dest
= gimple_assign_lhs (stmt
);
5740 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5743 code
= gimple_assign_rhs_code (stmt
);
5744 if (code
!= ARRAY_REF
5745 && code
!= BIT_FIELD_REF
5746 && code
!= INDIRECT_REF
5747 && code
!= COMPONENT_REF
5748 && code
!= IMAGPART_EXPR
5749 && code
!= REALPART_EXPR
5751 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5754 if (!STMT_VINFO_DATA_REF (stmt_info
))
5757 elem_type
= TREE_TYPE (vectype
);
5758 mode
= TYPE_MODE (vectype
);
5760 /* FORNOW. In some cases can vectorize even if data-type not supported
5761 (e.g. - data copies). */
5762 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5764 if (dump_enabled_p ())
5765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5766 "Aligned load, but unsupported type.\n");
5770 /* Check if the load is a part of an interleaving chain. */
5771 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5773 grouped_load
= true;
5775 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5777 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5778 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5780 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5781 if (vect_load_lanes_supported (vectype
, group_size
))
5782 load_lanes_p
= true;
5783 else if (!vect_grouped_load_supported (vectype
, group_size
))
5787 /* Invalidate assumptions made by dependence analysis when vectorization
5788 on the unrolled body effectively re-orders stmts. */
5789 if (!PURE_SLP_STMT (stmt_info
)
5790 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5791 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5792 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5794 if (dump_enabled_p ())
5795 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5796 "cannot perform implicit CSE when performing "
5797 "group loads with negative dependence distance\n");
5801 /* Similarly when the stmt is a load that is both part of a SLP
5802 instance and a loop vectorized stmt via the same-dr mechanism
5803 we have to give up. */
5804 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
5805 && (STMT_SLP_TYPE (stmt_info
)
5806 != STMT_SLP_TYPE (vinfo_for_stmt
5807 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5811 "conflicting SLP types for CSEd load\n");
5817 if (STMT_VINFO_GATHER_P (stmt_info
))
5821 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5822 &gather_off
, &gather_scale
);
5823 gcc_assert (gather_decl
);
5824 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5825 &def_stmt
, &def
, &gather_dt
,
5826 &gather_off_vectype
))
5828 if (dump_enabled_p ())
5829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5830 "gather index use not simple.\n");
5834 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5838 negative
= tree_int_cst_compare (nested_in_vect_loop
5839 ? STMT_VINFO_DR_STEP (stmt_info
)
5841 size_zero_node
) < 0;
5842 if (negative
&& ncopies
> 1)
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5846 "multiple types with negative step.\n");
5854 if (dump_enabled_p ())
5855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5856 "negative step for group load not supported"
5860 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5861 if (alignment_support_scheme
!= dr_aligned
5862 && alignment_support_scheme
!= dr_unaligned_supported
)
5864 if (dump_enabled_p ())
5865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5866 "negative step but alignment required.\n");
5869 if (!perm_mask_for_reverse (vectype
))
5871 if (dump_enabled_p ())
5872 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5873 "negative step and reversing not supported."
5880 if (!vec_stmt
) /* transformation not required. */
5882 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5883 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
5887 if (dump_enabled_p ())
5888 dump_printf_loc (MSG_NOTE
, vect_location
,
5889 "transform load. ncopies = %d\n", ncopies
);
5893 ensure_base_align (stmt_info
, dr
);
5895 if (STMT_VINFO_GATHER_P (stmt_info
))
5897 tree vec_oprnd0
= NULL_TREE
, op
;
5898 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
5899 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5900 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
5901 edge pe
= loop_preheader_edge (loop
);
5904 enum { NARROW
, NONE
, WIDEN
} modifier
;
5905 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
5907 if (nunits
== gather_off_nunits
)
5909 else if (nunits
== gather_off_nunits
/ 2)
5911 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
5914 for (i
= 0; i
< gather_off_nunits
; ++i
)
5915 sel
[i
] = i
| nunits
;
5917 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
5919 else if (nunits
== gather_off_nunits
* 2)
5921 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5924 for (i
= 0; i
< nunits
; ++i
)
5925 sel
[i
] = i
< gather_off_nunits
5926 ? i
: i
+ nunits
- gather_off_nunits
;
5928 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5934 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
5935 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5936 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5937 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5938 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5939 scaletype
= TREE_VALUE (arglist
);
5940 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
5942 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5944 ptr
= fold_convert (ptrtype
, gather_base
);
5945 if (!is_gimple_min_invariant (ptr
))
5947 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5948 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5949 gcc_assert (!new_bb
);
5952 /* Currently we support only unconditional gather loads,
5953 so mask should be all ones. */
5954 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
5955 mask
= build_int_cst (masktype
, -1);
5956 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
5958 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
5959 mask
= build_vector_from_val (masktype
, mask
);
5960 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5962 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
5966 for (j
= 0; j
< 6; ++j
)
5968 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
5969 mask
= build_real (TREE_TYPE (masktype
), r
);
5970 mask
= build_vector_from_val (masktype
, mask
);
5971 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5976 scale
= build_int_cst (scaletype
, gather_scale
);
5978 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
5979 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
5980 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
5984 for (j
= 0; j
< 6; ++j
)
5986 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
5987 merge
= build_real (TREE_TYPE (rettype
), r
);
5991 merge
= build_vector_from_val (rettype
, merge
);
5992 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
5994 prev_stmt_info
= NULL
;
5995 for (j
= 0; j
< ncopies
; ++j
)
5997 if (modifier
== WIDEN
&& (j
& 1))
5998 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
5999 perm_mask
, stmt
, gsi
);
6002 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
6005 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6007 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6009 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6010 == TYPE_VECTOR_SUBPARTS (idxtype
));
6011 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
6012 var
= make_ssa_name (var
);
6013 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6015 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6016 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6021 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6023 if (!useless_type_conversion_p (vectype
, rettype
))
6025 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6026 == TYPE_VECTOR_SUBPARTS (rettype
));
6027 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
6028 op
= make_ssa_name (var
, new_stmt
);
6029 gimple_call_set_lhs (new_stmt
, op
);
6030 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6031 var
= make_ssa_name (vec_dest
);
6032 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6034 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6038 var
= make_ssa_name (vec_dest
, new_stmt
);
6039 gimple_call_set_lhs (new_stmt
, var
);
6042 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6044 if (modifier
== NARROW
)
6051 var
= permute_vec_elements (prev_res
, var
,
6052 perm_mask
, stmt
, gsi
);
6053 new_stmt
= SSA_NAME_DEF_STMT (var
);
6056 if (prev_stmt_info
== NULL
)
6057 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6059 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6060 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6064 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
6066 gimple_stmt_iterator incr_gsi
;
6072 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6073 gimple_seq stmts
= NULL
;
6074 tree stride_base
, stride_step
, alias_off
;
6076 gcc_assert (!nested_in_vect_loop
);
6079 = fold_build_pointer_plus
6080 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6081 size_binop (PLUS_EXPR
,
6082 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6083 convert_to_ptrofftype (DR_INIT (dr
))));
6084 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6086 /* For a load with loop-invariant (but other than power-of-2)
6087 stride (i.e. not a grouped access) like so:
6089 for (i = 0; i < n; i += stride)
6092 we generate a new induction variable and new accesses to
6093 form a new vector (or vectors, depending on ncopies):
6095 for (j = 0; ; j += VF*stride)
6097 tmp2 = array[j + stride];
6099 vectemp = {tmp1, tmp2, ...}
6102 ivstep
= stride_step
;
6103 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6104 build_int_cst (TREE_TYPE (ivstep
), vf
));
6106 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6108 create_iv (stride_base
, ivstep
, NULL
,
6109 loop
, &incr_gsi
, insert_after
,
6111 incr
= gsi_stmt (incr_gsi
);
6112 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6114 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6116 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6118 prev_stmt_info
= NULL
;
6119 running_off
= offvar
;
6120 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6121 for (j
= 0; j
< ncopies
; j
++)
6125 vec_alloc (v
, nunits
);
6126 for (i
= 0; i
< nunits
; i
++)
6128 tree newref
, newoff
;
6130 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
6131 running_off
, alias_off
);
6133 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6136 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6137 newoff
= copy_ssa_name (running_off
);
6138 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6139 running_off
, stride_step
);
6140 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6142 running_off
= newoff
;
6145 vec_inv
= build_constructor (vectype
, v
);
6146 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6147 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6150 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6152 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6153 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6160 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6162 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6163 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6164 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6166 /* Check if the chain of loads is already vectorized. */
6167 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6168 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6169 ??? But we can only do so if there is exactly one
6170 as we have no way to get at the rest. Leave the CSE
6172 ??? With the group load eventually participating
6173 in multiple different permutations (having multiple
6174 slp nodes which refer to the same group) the CSE
6175 is even wrong code. See PR56270. */
6178 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6181 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6182 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6184 /* VEC_NUM is the number of vect stmts to be created for this group. */
6187 grouped_load
= false;
6188 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6189 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6191 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6195 vec_num
= group_size
;
6203 group_size
= vec_num
= 1;
6207 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6208 gcc_assert (alignment_support_scheme
);
6209 /* Targets with load-lane instructions must not require explicit
6211 gcc_assert (!load_lanes_p
6212 || alignment_support_scheme
== dr_aligned
6213 || alignment_support_scheme
== dr_unaligned_supported
);
6215 /* In case the vectorization factor (VF) is bigger than the number
6216 of elements that we can fit in a vectype (nunits), we have to generate
6217 more than one vector stmt - i.e - we need to "unroll" the
6218 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6219 from one copy of the vector stmt to the next, in the field
6220 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6221 stages to find the correct vector defs to be used when vectorizing
6222 stmts that use the defs of the current stmt. The example below
6223 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6224 need to create 4 vectorized stmts):
6226 before vectorization:
6227 RELATED_STMT VEC_STMT
6231 step 1: vectorize stmt S1:
6232 We first create the vector stmt VS1_0, and, as usual, record a
6233 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6234 Next, we create the vector stmt VS1_1, and record a pointer to
6235 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6236 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6238 RELATED_STMT VEC_STMT
6239 VS1_0: vx0 = memref0 VS1_1 -
6240 VS1_1: vx1 = memref1 VS1_2 -
6241 VS1_2: vx2 = memref2 VS1_3 -
6242 VS1_3: vx3 = memref3 - -
6243 S1: x = load - VS1_0
6246 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6247 information we recorded in RELATED_STMT field is used to vectorize
6250 /* In case of interleaving (non-unit grouped access):
6257 Vectorized loads are created in the order of memory accesses
6258 starting from the access of the first stmt of the chain:
6261 VS2: vx1 = &base + vec_size*1
6262 VS3: vx3 = &base + vec_size*2
6263 VS4: vx4 = &base + vec_size*3
6265 Then permutation statements are generated:
6267 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6268 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6271 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6272 (the order of the data-refs in the output of vect_permute_load_chain
6273 corresponds to the order of scalar stmts in the interleaving chain - see
6274 the documentation of vect_permute_load_chain()).
6275 The generation of permutation stmts and recording them in
6276 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6278 In case of both multiple types and interleaving, the vector loads and
6279 permutation stmts above are created for every copy. The result vector
6280 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6281 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6283 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6284 on a target that supports unaligned accesses (dr_unaligned_supported)
6285 we generate the following code:
6289 p = p + indx * vectype_size;
6294 Otherwise, the data reference is potentially unaligned on a target that
6295 does not support unaligned accesses (dr_explicit_realign_optimized) -
6296 then generate the following code, in which the data in each iteration is
6297 obtained by two vector loads, one from the previous iteration, and one
6298 from the current iteration:
6300 msq_init = *(floor(p1))
6301 p2 = initial_addr + VS - 1;
6302 realignment_token = call target_builtin;
6305 p2 = p2 + indx * vectype_size
6307 vec_dest = realign_load (msq, lsq, realignment_token)
6312 /* If the misalignment remains the same throughout the execution of the
6313 loop, we can create the init_addr and permutation mask at the loop
6314 preheader. Otherwise, it needs to be created inside the loop.
6315 This can only occur when vectorizing memory accesses in the inner-loop
6316 nested within an outer-loop that is being vectorized. */
6318 if (nested_in_vect_loop
6319 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6320 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6322 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6323 compute_in_loop
= true;
6326 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6327 || alignment_support_scheme
== dr_explicit_realign
)
6328 && !compute_in_loop
)
6330 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6331 alignment_support_scheme
, NULL_TREE
,
6333 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6335 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6336 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6344 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6347 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6349 aggr_type
= vectype
;
6351 prev_stmt_info
= NULL
;
6352 for (j
= 0; j
< ncopies
; j
++)
6354 /* 1. Create the vector or array pointer update chain. */
6357 bool simd_lane_access_p
6358 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6359 if (simd_lane_access_p
6360 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6361 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6362 && integer_zerop (DR_OFFSET (first_dr
))
6363 && integer_zerop (DR_INIT (first_dr
))
6364 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6365 get_alias_set (DR_REF (first_dr
)))
6366 && (alignment_support_scheme
== dr_aligned
6367 || alignment_support_scheme
== dr_unaligned_supported
))
6369 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6370 dataref_offset
= build_int_cst (reference_alias_ptr_type
6371 (DR_REF (first_dr
)), 0);
6376 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6377 offset
, &dummy
, gsi
, &ptr_incr
,
6378 simd_lane_access_p
, &inv_p
,
6381 else if (dataref_offset
)
6382 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6383 TYPE_SIZE_UNIT (aggr_type
));
6385 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6386 TYPE_SIZE_UNIT (aggr_type
));
6388 if (grouped_load
|| slp_perm
)
6389 dr_chain
.create (vec_num
);
6395 vec_array
= create_vector_array (vectype
, vec_num
);
6398 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6399 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6400 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6401 gimple_call_set_lhs (new_stmt
, vec_array
);
6402 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6404 /* Extract each vector into an SSA_NAME. */
6405 for (i
= 0; i
< vec_num
; i
++)
6407 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6409 dr_chain
.quick_push (new_temp
);
6412 /* Record the mapping between SSA_NAMEs and statements. */
6413 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6417 for (i
= 0; i
< vec_num
; i
++)
6420 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6423 /* 2. Create the vector-load in the loop. */
6424 switch (alignment_support_scheme
)
6427 case dr_unaligned_supported
:
6429 unsigned int align
, misalign
;
6432 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6435 : build_int_cst (reference_alias_ptr_type
6436 (DR_REF (first_dr
)), 0));
6437 align
= TYPE_ALIGN_UNIT (vectype
);
6438 if (alignment_support_scheme
== dr_aligned
)
6440 gcc_assert (aligned_access_p (first_dr
));
6443 else if (DR_MISALIGNMENT (first_dr
) == -1)
6445 TREE_TYPE (data_ref
)
6446 = build_aligned_type (TREE_TYPE (data_ref
),
6447 TYPE_ALIGN (elem_type
));
6448 align
= TYPE_ALIGN_UNIT (elem_type
);
6453 TREE_TYPE (data_ref
)
6454 = build_aligned_type (TREE_TYPE (data_ref
),
6455 TYPE_ALIGN (elem_type
));
6456 misalign
= DR_MISALIGNMENT (first_dr
);
6458 if (dataref_offset
== NULL_TREE
)
6459 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6463 case dr_explicit_realign
:
6468 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6470 if (compute_in_loop
)
6471 msq
= vect_setup_realignment (first_stmt
, gsi
,
6473 dr_explicit_realign
,
6476 ptr
= copy_ssa_name (dataref_ptr
);
6477 new_stmt
= gimple_build_assign
6478 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
6480 (TREE_TYPE (dataref_ptr
),
6481 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6482 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6484 = build2 (MEM_REF
, vectype
, ptr
,
6485 build_int_cst (reference_alias_ptr_type
6486 (DR_REF (first_dr
)), 0));
6487 vec_dest
= vect_create_destination_var (scalar_dest
,
6489 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6490 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6491 gimple_assign_set_lhs (new_stmt
, new_temp
);
6492 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6493 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6494 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6497 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6498 TYPE_SIZE_UNIT (elem_type
));
6499 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6500 new_stmt
= gimple_build_assign
6501 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
6504 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6505 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6506 gimple_assign_set_lhs (new_stmt
, ptr
);
6507 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6509 = build2 (MEM_REF
, vectype
, ptr
,
6510 build_int_cst (reference_alias_ptr_type
6511 (DR_REF (first_dr
)), 0));
6514 case dr_explicit_realign_optimized
:
6515 new_temp
= copy_ssa_name (dataref_ptr
);
6516 new_stmt
= gimple_build_assign
6517 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
6519 (TREE_TYPE (dataref_ptr
),
6520 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6521 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6523 = build2 (MEM_REF
, vectype
, new_temp
,
6524 build_int_cst (reference_alias_ptr_type
6525 (DR_REF (first_dr
)), 0));
6530 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6531 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6532 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6533 gimple_assign_set_lhs (new_stmt
, new_temp
);
6534 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6536 /* 3. Handle explicit realignment if necessary/supported.
6538 vec_dest = realign_load (msq, lsq, realignment_token) */
6539 if (alignment_support_scheme
== dr_explicit_realign_optimized
6540 || alignment_support_scheme
== dr_explicit_realign
)
6542 lsq
= gimple_assign_lhs (new_stmt
);
6543 if (!realignment_token
)
6544 realignment_token
= dataref_ptr
;
6545 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6546 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
6547 msq
, lsq
, realignment_token
);
6548 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6549 gimple_assign_set_lhs (new_stmt
, new_temp
);
6550 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6552 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6555 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6556 add_phi_arg (phi
, lsq
,
6557 loop_latch_edge (containing_loop
),
6563 /* 4. Handle invariant-load. */
6564 if (inv_p
&& !bb_vinfo
)
6566 gcc_assert (!grouped_load
);
6567 /* If we have versioned for aliasing or the loop doesn't
6568 have any data dependencies that would preclude this,
6569 then we are sure this is a loop invariant load and
6570 thus we can insert it on the preheader edge. */
6571 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6572 && !nested_in_vect_loop
6573 && hoist_defs_of_uses (stmt
, loop
))
6575 if (dump_enabled_p ())
6577 dump_printf_loc (MSG_NOTE
, vect_location
,
6578 "hoisting out of the vectorized "
6580 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6582 tree tem
= copy_ssa_name (scalar_dest
);
6583 gsi_insert_on_edge_immediate
6584 (loop_preheader_edge (loop
),
6585 gimple_build_assign (tem
,
6587 (gimple_assign_rhs1 (stmt
))));
6588 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6592 gimple_stmt_iterator gsi2
= *gsi
;
6594 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6597 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6598 set_vinfo_for_stmt (new_stmt
,
6599 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6605 tree perm_mask
= perm_mask_for_reverse (vectype
);
6606 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6607 perm_mask
, stmt
, gsi
);
6608 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6611 /* Collect vector loads and later create their permutation in
6612 vect_transform_grouped_load (). */
6613 if (grouped_load
|| slp_perm
)
6614 dr_chain
.quick_push (new_temp
);
6616 /* Store vector loads in the corresponding SLP_NODE. */
6617 if (slp
&& !slp_perm
)
6618 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6620 /* Bump the vector pointer to account for a gap. */
6621 if (slp
&& group_gap
!= 0)
6623 tree bump
= size_binop (MULT_EXPR
,
6624 TYPE_SIZE_UNIT (elem_type
),
6625 size_int (group_gap
));
6626 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6631 if (slp
&& !slp_perm
)
6636 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6637 slp_node_instance
, false))
6639 dr_chain
.release ();
6648 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6649 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6654 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6656 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6657 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6660 dr_chain
.release ();
6666 /* Function vect_is_simple_cond.
6669 LOOP - the loop that is being vectorized.
6670 COND - Condition that is checked for simple use.
6673 *COMP_VECTYPE - the vector type for the comparison.
6675 Returns whether a COND can be vectorized. Checks whether
6676 condition operands are supportable using vec_is_simple_use. */
6679 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6680 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6684 enum vect_def_type dt
;
6685 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6687 if (!COMPARISON_CLASS_P (cond
))
6690 lhs
= TREE_OPERAND (cond
, 0);
6691 rhs
= TREE_OPERAND (cond
, 1);
6693 if (TREE_CODE (lhs
) == SSA_NAME
)
6695 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6696 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6697 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6700 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6701 && TREE_CODE (lhs
) != FIXED_CST
)
6704 if (TREE_CODE (rhs
) == SSA_NAME
)
6706 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6707 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6708 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6711 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6712 && TREE_CODE (rhs
) != FIXED_CST
)
6715 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6719 /* vectorizable_condition.
6721 Check if STMT is conditional modify expression that can be vectorized.
6722 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6723 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6726 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6727 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6728 else caluse if it is 2).
6730 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6733 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6734 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6737 tree scalar_dest
= NULL_TREE
;
6738 tree vec_dest
= NULL_TREE
;
6739 tree cond_expr
, then_clause
, else_clause
;
6740 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6741 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6742 tree comp_vectype
= NULL_TREE
;
6743 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6744 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6745 tree vec_compare
, vec_cond_expr
;
6747 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6749 enum vect_def_type dt
, dts
[4];
6750 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6752 enum tree_code code
;
6753 stmt_vec_info prev_stmt_info
= NULL
;
6755 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6756 vec
<tree
> vec_oprnds0
= vNULL
;
6757 vec
<tree
> vec_oprnds1
= vNULL
;
6758 vec
<tree
> vec_oprnds2
= vNULL
;
6759 vec
<tree
> vec_oprnds3
= vNULL
;
6762 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6765 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6767 gcc_assert (ncopies
>= 1);
6768 if (reduc_index
&& ncopies
> 1)
6769 return false; /* FORNOW */
6771 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6774 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6777 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6778 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6782 /* FORNOW: not yet supported. */
6783 if (STMT_VINFO_LIVE_P (stmt_info
))
6785 if (dump_enabled_p ())
6786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6787 "value used after loop.\n");
6791 /* Is vectorizable conditional operation? */
6792 if (!is_gimple_assign (stmt
))
6795 code
= gimple_assign_rhs_code (stmt
);
6797 if (code
!= COND_EXPR
)
6800 cond_expr
= gimple_assign_rhs1 (stmt
);
6801 then_clause
= gimple_assign_rhs2 (stmt
);
6802 else_clause
= gimple_assign_rhs3 (stmt
);
6804 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6809 if (TREE_CODE (then_clause
) == SSA_NAME
)
6811 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6812 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6813 &then_def_stmt
, &def
, &dt
))
6816 else if (TREE_CODE (then_clause
) != INTEGER_CST
6817 && TREE_CODE (then_clause
) != REAL_CST
6818 && TREE_CODE (then_clause
) != FIXED_CST
)
6821 if (TREE_CODE (else_clause
) == SSA_NAME
)
6823 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6824 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6825 &else_def_stmt
, &def
, &dt
))
6828 else if (TREE_CODE (else_clause
) != INTEGER_CST
6829 && TREE_CODE (else_clause
) != REAL_CST
6830 && TREE_CODE (else_clause
) != FIXED_CST
)
6833 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
6834 /* The result of a vector comparison should be signed type. */
6835 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
6836 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
6837 if (vec_cmp_type
== NULL_TREE
)
6842 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6843 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
6850 vec_oprnds0
.create (1);
6851 vec_oprnds1
.create (1);
6852 vec_oprnds2
.create (1);
6853 vec_oprnds3
.create (1);
6857 scalar_dest
= gimple_assign_lhs (stmt
);
6858 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6860 /* Handle cond expr. */
6861 for (j
= 0; j
< ncopies
; j
++)
6863 gassign
*new_stmt
= NULL
;
6868 auto_vec
<tree
, 4> ops
;
6869 auto_vec
<vec
<tree
>, 4> vec_defs
;
6871 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
6872 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
6873 ops
.safe_push (then_clause
);
6874 ops
.safe_push (else_clause
);
6875 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
6876 vec_oprnds3
= vec_defs
.pop ();
6877 vec_oprnds2
= vec_defs
.pop ();
6878 vec_oprnds1
= vec_defs
.pop ();
6879 vec_oprnds0
= vec_defs
.pop ();
6882 vec_defs
.release ();
6888 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
6890 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
6891 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
6894 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
6896 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
6897 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
6898 if (reduc_index
== 1)
6899 vec_then_clause
= reduc_def
;
6902 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
6904 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
6905 NULL
, >emp
, &def
, &dts
[2]);
6907 if (reduc_index
== 2)
6908 vec_else_clause
= reduc_def
;
6911 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
6913 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
6914 NULL
, >emp
, &def
, &dts
[3]);
6920 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
6921 vec_oprnds0
.pop ());
6922 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
6923 vec_oprnds1
.pop ());
6924 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
6925 vec_oprnds2
.pop ());
6926 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
6927 vec_oprnds3
.pop ());
6932 vec_oprnds0
.quick_push (vec_cond_lhs
);
6933 vec_oprnds1
.quick_push (vec_cond_rhs
);
6934 vec_oprnds2
.quick_push (vec_then_clause
);
6935 vec_oprnds3
.quick_push (vec_else_clause
);
6938 /* Arguments are ready. Create the new vector stmt. */
6939 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
6941 vec_cond_rhs
= vec_oprnds1
[i
];
6942 vec_then_clause
= vec_oprnds2
[i
];
6943 vec_else_clause
= vec_oprnds3
[i
];
6945 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
6946 vec_cond_lhs
, vec_cond_rhs
);
6947 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6948 vec_compare
, vec_then_clause
, vec_else_clause
);
6950 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6951 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6952 gimple_assign_set_lhs (new_stmt
, new_temp
);
6953 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6955 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6962 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6964 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6966 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6969 vec_oprnds0
.release ();
6970 vec_oprnds1
.release ();
6971 vec_oprnds2
.release ();
6972 vec_oprnds3
.release ();
6978 /* Make sure the statement is vectorizable. */
6981 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
6983 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6984 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6985 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
6987 tree scalar_type
, vectype
;
6988 gimple pattern_stmt
;
6989 gimple_seq pattern_def_seq
;
6991 if (dump_enabled_p ())
6993 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
6994 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6997 if (gimple_has_volatile_ops (stmt
))
6999 if (dump_enabled_p ())
7000 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7001 "not vectorized: stmt has volatile operands\n");
7006 /* Skip stmts that do not need to be vectorized. In loops this is expected
7008 - the COND_EXPR which is the loop exit condition
7009 - any LABEL_EXPRs in the loop
7010 - computations that are used only for array indexing or loop control.
7011 In basic blocks we only analyze statements that are a part of some SLP
7012 instance, therefore, all the statements are relevant.
7014 Pattern statement needs to be analyzed instead of the original statement
7015 if the original statement is not relevant. Otherwise, we analyze both
7016 statements. In basic blocks we are called from some SLP instance
7017 traversal, don't analyze pattern stmts instead, the pattern stmts
7018 already will be part of SLP instance. */
7020 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7021 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7022 && !STMT_VINFO_LIVE_P (stmt_info
))
7024 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7026 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7027 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7029 /* Analyze PATTERN_STMT instead of the original stmt. */
7030 stmt
= pattern_stmt
;
7031 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7032 if (dump_enabled_p ())
7034 dump_printf_loc (MSG_NOTE
, vect_location
,
7035 "==> examining pattern statement: ");
7036 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7041 if (dump_enabled_p ())
7042 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7047 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7050 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7051 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7053 /* Analyze PATTERN_STMT too. */
7054 if (dump_enabled_p ())
7056 dump_printf_loc (MSG_NOTE
, vect_location
,
7057 "==> examining pattern statement: ");
7058 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7061 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7065 if (is_pattern_stmt_p (stmt_info
)
7067 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7069 gimple_stmt_iterator si
;
7071 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7073 gimple pattern_def_stmt
= gsi_stmt (si
);
7074 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7075 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7077 /* Analyze def stmt of STMT if it's a pattern stmt. */
7078 if (dump_enabled_p ())
7080 dump_printf_loc (MSG_NOTE
, vect_location
,
7081 "==> examining pattern def statement: ");
7082 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7085 if (!vect_analyze_stmt (pattern_def_stmt
,
7086 need_to_vectorize
, node
))
7092 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7094 case vect_internal_def
:
7097 case vect_reduction_def
:
7098 case vect_nested_cycle
:
7099 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7100 || relevance
== vect_used_in_outer_by_reduction
7101 || relevance
== vect_unused_in_scope
));
7104 case vect_induction_def
:
7105 case vect_constant_def
:
7106 case vect_external_def
:
7107 case vect_unknown_def_type
:
7114 gcc_assert (PURE_SLP_STMT (stmt_info
));
7116 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7117 if (dump_enabled_p ())
7119 dump_printf_loc (MSG_NOTE
, vect_location
,
7120 "get vectype for scalar type: ");
7121 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7122 dump_printf (MSG_NOTE
, "\n");
7125 vectype
= get_vectype_for_scalar_type (scalar_type
);
7128 if (dump_enabled_p ())
7130 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7131 "not SLPed: unsupported data-type ");
7132 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7134 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7139 if (dump_enabled_p ())
7141 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7142 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7143 dump_printf (MSG_NOTE
, "\n");
7146 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7149 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7151 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7152 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7153 || (is_gimple_call (stmt
)
7154 && gimple_call_lhs (stmt
) == NULL_TREE
));
7155 *need_to_vectorize
= true;
7160 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7161 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7162 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7163 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7164 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7165 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7166 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7167 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7168 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7169 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7170 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7171 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7175 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7176 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7177 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7178 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7179 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7180 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7181 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7182 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7183 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7188 if (dump_enabled_p ())
7190 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7191 "not vectorized: relevant stmt not ");
7192 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7193 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7202 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7203 need extra handling, except for vectorizable reductions. */
7204 if (STMT_VINFO_LIVE_P (stmt_info
)
7205 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7206 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7210 if (dump_enabled_p ())
7212 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7213 "not vectorized: live stmt not ");
7214 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7215 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7225 /* Function vect_transform_stmt.
7227 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7230 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7231 bool *grouped_store
, slp_tree slp_node
,
7232 slp_instance slp_node_instance
)
7234 bool is_store
= false;
7235 gimple vec_stmt
= NULL
;
7236 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7239 switch (STMT_VINFO_TYPE (stmt_info
))
7241 case type_demotion_vec_info_type
:
7242 case type_promotion_vec_info_type
:
7243 case type_conversion_vec_info_type
:
7244 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7248 case induc_vec_info_type
:
7249 gcc_assert (!slp_node
);
7250 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7254 case shift_vec_info_type
:
7255 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7259 case op_vec_info_type
:
7260 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7264 case assignment_vec_info_type
:
7265 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7269 case load_vec_info_type
:
7270 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7275 case store_vec_info_type
:
7276 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7278 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7280 /* In case of interleaving, the whole chain is vectorized when the
7281 last store in the chain is reached. Store stmts before the last
7282 one are skipped, and there vec_stmt_info shouldn't be freed
7284 *grouped_store
= true;
7285 if (STMT_VINFO_VEC_STMT (stmt_info
))
7292 case condition_vec_info_type
:
7293 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7297 case call_vec_info_type
:
7298 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7299 stmt
= gsi_stmt (*gsi
);
7300 if (is_gimple_call (stmt
)
7301 && gimple_call_internal_p (stmt
)
7302 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7306 case call_simd_clone_vec_info_type
:
7307 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7308 stmt
= gsi_stmt (*gsi
);
7311 case reduc_vec_info_type
:
7312 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7317 if (!STMT_VINFO_LIVE_P (stmt_info
))
7319 if (dump_enabled_p ())
7320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7321 "stmt not supported.\n");
7326 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7327 is being vectorized, but outside the immediately enclosing loop. */
7329 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7330 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7331 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7332 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7333 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7334 || STMT_VINFO_RELEVANT (stmt_info
) ==
7335 vect_used_in_outer_by_reduction
))
7337 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7338 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7339 imm_use_iterator imm_iter
;
7340 use_operand_p use_p
;
7344 if (dump_enabled_p ())
7345 dump_printf_loc (MSG_NOTE
, vect_location
,
7346 "Record the vdef for outer-loop vectorization.\n");
7348 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7349 (to be used when vectorizing outer-loop stmts that use the DEF of
7351 if (gimple_code (stmt
) == GIMPLE_PHI
)
7352 scalar_dest
= PHI_RESULT (stmt
);
7354 scalar_dest
= gimple_assign_lhs (stmt
);
7356 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7358 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7360 exit_phi
= USE_STMT (use_p
);
7361 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7366 /* Handle stmts whose DEF is used outside the loop-nest that is
7367 being vectorized. */
7368 if (STMT_VINFO_LIVE_P (stmt_info
)
7369 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7371 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7376 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7382 /* Remove a group of stores (for SLP or interleaving), free their
7386 vect_remove_stores (gimple first_stmt
)
7388 gimple next
= first_stmt
;
7390 gimple_stmt_iterator next_si
;
7394 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7396 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7397 if (is_pattern_stmt_p (stmt_info
))
7398 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7399 /* Free the attached stmt_vec_info and remove the stmt. */
7400 next_si
= gsi_for_stmt (next
);
7401 unlink_stmt_vdef (next
);
7402 gsi_remove (&next_si
, true);
7403 release_defs (next
);
7404 free_stmt_vec_info (next
);
7410 /* Function new_stmt_vec_info.
7412 Create and initialize a new stmt_vec_info struct for STMT. */
7415 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7416 bb_vec_info bb_vinfo
)
7419 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7421 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7422 STMT_VINFO_STMT (res
) = stmt
;
7423 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7424 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7425 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7426 STMT_VINFO_LIVE_P (res
) = false;
7427 STMT_VINFO_VECTYPE (res
) = NULL
;
7428 STMT_VINFO_VEC_STMT (res
) = NULL
;
7429 STMT_VINFO_VECTORIZABLE (res
) = true;
7430 STMT_VINFO_IN_PATTERN_P (res
) = false;
7431 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7432 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7433 STMT_VINFO_DATA_REF (res
) = NULL
;
7435 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7436 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7437 STMT_VINFO_DR_INIT (res
) = NULL
;
7438 STMT_VINFO_DR_STEP (res
) = NULL
;
7439 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7441 if (gimple_code (stmt
) == GIMPLE_PHI
7442 && is_loop_header_bb_p (gimple_bb (stmt
)))
7443 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7445 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7447 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7448 STMT_SLP_TYPE (res
) = loop_vect
;
7449 GROUP_FIRST_ELEMENT (res
) = NULL
;
7450 GROUP_NEXT_ELEMENT (res
) = NULL
;
7451 GROUP_SIZE (res
) = 0;
7452 GROUP_STORE_COUNT (res
) = 0;
7453 GROUP_GAP (res
) = 0;
7454 GROUP_SAME_DR_STMT (res
) = NULL
;
7460 /* Create a hash table for stmt_vec_info. */
7463 init_stmt_vec_info_vec (void)
7465 gcc_assert (!stmt_vec_info_vec
.exists ());
7466 stmt_vec_info_vec
.create (50);
7470 /* Free hash table for stmt_vec_info. */
7473 free_stmt_vec_info_vec (void)
7477 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7479 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7480 gcc_assert (stmt_vec_info_vec
.exists ());
7481 stmt_vec_info_vec
.release ();
7485 /* Free stmt vectorization related info. */
7488 free_stmt_vec_info (gimple stmt
)
7490 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7495 /* Check if this statement has a related "pattern stmt"
7496 (introduced by the vectorizer during the pattern recognition
7497 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7499 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7501 stmt_vec_info patt_info
7502 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7505 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7506 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7507 gimple_set_bb (patt_stmt
, NULL
);
7508 tree lhs
= gimple_get_lhs (patt_stmt
);
7509 if (TREE_CODE (lhs
) == SSA_NAME
)
7510 release_ssa_name (lhs
);
7513 gimple_stmt_iterator si
;
7514 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7516 gimple seq_stmt
= gsi_stmt (si
);
7517 gimple_set_bb (seq_stmt
, NULL
);
7518 lhs
= gimple_get_lhs (patt_stmt
);
7519 if (TREE_CODE (lhs
) == SSA_NAME
)
7520 release_ssa_name (lhs
);
7521 free_stmt_vec_info (seq_stmt
);
7524 free_stmt_vec_info (patt_stmt
);
7528 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7529 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
7530 set_vinfo_for_stmt (stmt
, NULL
);
7535 /* Function get_vectype_for_scalar_type_and_size.
7537 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7541 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7543 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7544 machine_mode simd_mode
;
7545 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7552 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7553 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7556 /* For vector types of elements whose mode precision doesn't
7557 match their types precision we use a element type of mode
7558 precision. The vectorization routines will have to make sure
7559 they support the proper result truncation/extension.
7560 We also make sure to build vector types with INTEGER_TYPE
7561 component type only. */
7562 if (INTEGRAL_TYPE_P (scalar_type
)
7563 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7564 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7565 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7566 TYPE_UNSIGNED (scalar_type
));
7568 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7569 When the component mode passes the above test simply use a type
7570 corresponding to that mode. The theory is that any use that
7571 would cause problems with this will disable vectorization anyway. */
7572 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7573 && !INTEGRAL_TYPE_P (scalar_type
))
7574 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7576 /* We can't build a vector type of elements with alignment bigger than
7578 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7579 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7580 TYPE_UNSIGNED (scalar_type
));
7582 /* If we felt back to using the mode fail if there was
7583 no scalar type for it. */
7584 if (scalar_type
== NULL_TREE
)
7587 /* If no size was supplied use the mode the target prefers. Otherwise
7588 lookup a vector mode of the specified size. */
7590 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7592 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7593 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7597 vectype
= build_vector_type (scalar_type
, nunits
);
7599 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7600 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7606 unsigned int current_vector_size
;
7608 /* Function get_vectype_for_scalar_type.
7610 Returns the vector type corresponding to SCALAR_TYPE as supported
7614 get_vectype_for_scalar_type (tree scalar_type
)
7617 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7618 current_vector_size
);
7620 && current_vector_size
== 0)
7621 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7625 /* Function get_same_sized_vectype
7627 Returns a vector type corresponding to SCALAR_TYPE of size
7628 VECTOR_TYPE if supported by the target. */
7631 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7633 return get_vectype_for_scalar_type_and_size
7634 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7637 /* Function vect_is_simple_use.
7640 LOOP_VINFO - the vect info of the loop that is being vectorized.
7641 BB_VINFO - the vect info of the basic block that is being vectorized.
7642 OPERAND - operand of STMT in the loop or bb.
7643 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7645 Returns whether a stmt with OPERAND can be vectorized.
7646 For loops, supportable operands are constants, loop invariants, and operands
7647 that are defined by the current iteration of the loop. Unsupportable
7648 operands are those that are defined by a previous iteration of the loop (as
7649 is the case in reduction/induction computations).
7650 For basic blocks, supportable operands are constants and bb invariants.
7651 For now, operands defined outside the basic block are not supported. */
7654 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7655 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7656 tree
*def
, enum vect_def_type
*dt
)
7659 stmt_vec_info stmt_vinfo
;
7660 struct loop
*loop
= NULL
;
7663 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7668 if (dump_enabled_p ())
7670 dump_printf_loc (MSG_NOTE
, vect_location
,
7671 "vect_is_simple_use: operand ");
7672 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7673 dump_printf (MSG_NOTE
, "\n");
7676 if (CONSTANT_CLASS_P (operand
))
7678 *dt
= vect_constant_def
;
7682 if (is_gimple_min_invariant (operand
))
7685 *dt
= vect_external_def
;
7689 if (TREE_CODE (operand
) == PAREN_EXPR
)
7691 if (dump_enabled_p ())
7692 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7693 operand
= TREE_OPERAND (operand
, 0);
7696 if (TREE_CODE (operand
) != SSA_NAME
)
7698 if (dump_enabled_p ())
7699 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7704 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7705 if (*def_stmt
== NULL
)
7707 if (dump_enabled_p ())
7708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7713 if (dump_enabled_p ())
7715 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7716 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7719 /* Empty stmt is expected only in case of a function argument.
7720 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7721 if (gimple_nop_p (*def_stmt
))
7724 *dt
= vect_external_def
;
7728 bb
= gimple_bb (*def_stmt
);
7730 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7731 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7732 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7733 *dt
= vect_external_def
;
7736 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7737 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7740 if (*dt
== vect_unknown_def_type
7742 && *dt
== vect_double_reduction_def
7743 && gimple_code (stmt
) != GIMPLE_PHI
))
7745 if (dump_enabled_p ())
7746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7747 "Unsupported pattern.\n");
7751 if (dump_enabled_p ())
7752 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
7754 switch (gimple_code (*def_stmt
))
7757 *def
= gimple_phi_result (*def_stmt
);
7761 *def
= gimple_assign_lhs (*def_stmt
);
7765 *def
= gimple_call_lhs (*def_stmt
);
7770 if (dump_enabled_p ())
7771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7772 "unsupported defining stmt:\n");
7779 /* Function vect_is_simple_use_1.
7781 Same as vect_is_simple_use_1 but also determines the vector operand
7782 type of OPERAND and stores it to *VECTYPE. If the definition of
7783 OPERAND is vect_uninitialized_def, vect_constant_def or
7784 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7785 is responsible to compute the best suited vector type for the
7789 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7790 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7791 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
7793 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
7797 /* Now get a vector type if the def is internal, otherwise supply
7798 NULL_TREE and leave it up to the caller to figure out a proper
7799 type for the use stmt. */
7800 if (*dt
== vect_internal_def
7801 || *dt
== vect_induction_def
7802 || *dt
== vect_reduction_def
7803 || *dt
== vect_double_reduction_def
7804 || *dt
== vect_nested_cycle
)
7806 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
7808 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7809 && !STMT_VINFO_RELEVANT (stmt_info
)
7810 && !STMT_VINFO_LIVE_P (stmt_info
))
7811 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7813 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7814 gcc_assert (*vectype
!= NULL_TREE
);
7816 else if (*dt
== vect_uninitialized_def
7817 || *dt
== vect_constant_def
7818 || *dt
== vect_external_def
)
7819 *vectype
= NULL_TREE
;
7827 /* Function supportable_widening_operation
7829 Check whether an operation represented by the code CODE is a
7830 widening operation that is supported by the target platform in
7831 vector form (i.e., when operating on arguments of type VECTYPE_IN
7832 producing a result of type VECTYPE_OUT).
7834 Widening operations we currently support are NOP (CONVERT), FLOAT
7835 and WIDEN_MULT. This function checks if these operations are supported
7836 by the target platform either directly (via vector tree-codes), or via
7840 - CODE1 and CODE2 are codes of vector operations to be used when
7841 vectorizing the operation, if available.
7842 - MULTI_STEP_CVT determines the number of required intermediate steps in
7843 case of multi-step conversion (like char->short->int - in that case
7844 MULTI_STEP_CVT will be 1).
7845 - INTERM_TYPES contains the intermediate type required to perform the
7846 widening operation (short in the above example). */
7849 supportable_widening_operation (enum tree_code code
, gimple stmt
,
7850 tree vectype_out
, tree vectype_in
,
7851 enum tree_code
*code1
, enum tree_code
*code2
,
7852 int *multi_step_cvt
,
7853 vec
<tree
> *interm_types
)
7855 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7856 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7857 struct loop
*vect_loop
= NULL
;
7858 machine_mode vec_mode
;
7859 enum insn_code icode1
, icode2
;
7860 optab optab1
, optab2
;
7861 tree vectype
= vectype_in
;
7862 tree wide_vectype
= vectype_out
;
7863 enum tree_code c1
, c2
;
7865 tree prev_type
, intermediate_type
;
7866 machine_mode intermediate_mode
, prev_mode
;
7867 optab optab3
, optab4
;
7869 *multi_step_cvt
= 0;
7871 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
7875 case WIDEN_MULT_EXPR
:
7876 /* The result of a vectorized widening operation usually requires
7877 two vectors (because the widened results do not fit into one vector).
7878 The generated vector results would normally be expected to be
7879 generated in the same order as in the original scalar computation,
7880 i.e. if 8 results are generated in each vector iteration, they are
7881 to be organized as follows:
7882 vect1: [res1,res2,res3,res4],
7883 vect2: [res5,res6,res7,res8].
7885 However, in the special case that the result of the widening
7886 operation is used in a reduction computation only, the order doesn't
7887 matter (because when vectorizing a reduction we change the order of
7888 the computation). Some targets can take advantage of this and
7889 generate more efficient code. For example, targets like Altivec,
7890 that support widen_mult using a sequence of {mult_even,mult_odd}
7891 generate the following vectors:
7892 vect1: [res1,res3,res5,res7],
7893 vect2: [res2,res4,res6,res8].
7895 When vectorizing outer-loops, we execute the inner-loop sequentially
7896 (each vectorized inner-loop iteration contributes to VF outer-loop
7897 iterations in parallel). We therefore don't allow to change the
7898 order of the computation in the inner-loop during outer-loop
7900 /* TODO: Another case in which order doesn't *really* matter is when we
7901 widen and then contract again, e.g. (short)((int)x * y >> 8).
7902 Normally, pack_trunc performs an even/odd permute, whereas the
7903 repack from an even/odd expansion would be an interleave, which
7904 would be significantly simpler for e.g. AVX2. */
7905 /* In any case, in order to avoid duplicating the code below, recurse
7906 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7907 are properly set up for the caller. If we fail, we'll continue with
7908 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7910 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
7911 && !nested_in_vect_loop_p (vect_loop
, stmt
)
7912 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
7913 stmt
, vectype_out
, vectype_in
,
7914 code1
, code2
, multi_step_cvt
,
7917 /* Elements in a vector with vect_used_by_reduction property cannot
7918 be reordered if the use chain with this property does not have the
7919 same operation. One such an example is s += a * b, where elements
7920 in a and b cannot be reordered. Here we check if the vector defined
7921 by STMT is only directly used in the reduction statement. */
7922 tree lhs
= gimple_assign_lhs (stmt
);
7923 use_operand_p dummy
;
7925 stmt_vec_info use_stmt_info
= NULL
;
7926 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
7927 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
7928 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
7931 c1
= VEC_WIDEN_MULT_LO_EXPR
;
7932 c2
= VEC_WIDEN_MULT_HI_EXPR
;
7935 case VEC_WIDEN_MULT_EVEN_EXPR
:
7936 /* Support the recursion induced just above. */
7937 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
7938 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
7941 case WIDEN_LSHIFT_EXPR
:
7942 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
7943 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
7947 c1
= VEC_UNPACK_LO_EXPR
;
7948 c2
= VEC_UNPACK_HI_EXPR
;
7952 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
7953 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
7956 case FIX_TRUNC_EXPR
:
7957 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7958 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7959 computing the operation. */
7966 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
7968 enum tree_code ctmp
= c1
;
7973 if (code
== FIX_TRUNC_EXPR
)
7975 /* The signedness is determined from output operand. */
7976 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7977 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
7981 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7982 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
7985 if (!optab1
|| !optab2
)
7988 vec_mode
= TYPE_MODE (vectype
);
7989 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
7990 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
7996 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7997 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8000 /* Check if it's a multi-step conversion that can be done using intermediate
8003 prev_type
= vectype
;
8004 prev_mode
= vec_mode
;
8006 if (!CONVERT_EXPR_CODE_P (code
))
8009 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8010 intermediate steps in promotion sequence. We try
8011 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8013 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8014 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8016 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8018 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8019 TYPE_UNSIGNED (prev_type
));
8020 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8021 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8023 if (!optab3
|| !optab4
8024 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8025 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8026 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8027 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8028 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8029 == CODE_FOR_nothing
)
8030 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8031 == CODE_FOR_nothing
))
8034 interm_types
->quick_push (intermediate_type
);
8035 (*multi_step_cvt
)++;
8037 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8038 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8041 prev_type
= intermediate_type
;
8042 prev_mode
= intermediate_mode
;
8045 interm_types
->release ();
8050 /* Function supportable_narrowing_operation
8052 Check whether an operation represented by the code CODE is a
8053 narrowing operation that is supported by the target platform in
8054 vector form (i.e., when operating on arguments of type VECTYPE_IN
8055 and producing a result of type VECTYPE_OUT).
8057 Narrowing operations we currently support are NOP (CONVERT) and
8058 FIX_TRUNC. This function checks if these operations are supported by
8059 the target platform directly via vector tree-codes.
8062 - CODE1 is the code of a vector operation to be used when
8063 vectorizing the operation, if available.
8064 - MULTI_STEP_CVT determines the number of required intermediate steps in
8065 case of multi-step conversion (like int->short->char - in that case
8066 MULTI_STEP_CVT will be 1).
8067 - INTERM_TYPES contains the intermediate type required to perform the
8068 narrowing operation (short in the above example). */
8071 supportable_narrowing_operation (enum tree_code code
,
8072 tree vectype_out
, tree vectype_in
,
8073 enum tree_code
*code1
, int *multi_step_cvt
,
8074 vec
<tree
> *interm_types
)
8076 machine_mode vec_mode
;
8077 enum insn_code icode1
;
8078 optab optab1
, interm_optab
;
8079 tree vectype
= vectype_in
;
8080 tree narrow_vectype
= vectype_out
;
8082 tree intermediate_type
;
8083 machine_mode intermediate_mode
, prev_mode
;
8087 *multi_step_cvt
= 0;
8091 c1
= VEC_PACK_TRUNC_EXPR
;
8094 case FIX_TRUNC_EXPR
:
8095 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8099 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8100 tree code and optabs used for computing the operation. */
8107 if (code
== FIX_TRUNC_EXPR
)
8108 /* The signedness is determined from output operand. */
8109 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8111 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8116 vec_mode
= TYPE_MODE (vectype
);
8117 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8122 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8125 /* Check if it's a multi-step conversion that can be done using intermediate
8127 prev_mode
= vec_mode
;
8128 if (code
== FIX_TRUNC_EXPR
)
8129 uns
= TYPE_UNSIGNED (vectype_out
);
8131 uns
= TYPE_UNSIGNED (vectype
);
8133 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8134 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8135 costly than signed. */
8136 if (code
== FIX_TRUNC_EXPR
&& uns
)
8138 enum insn_code icode2
;
8141 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8143 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8144 if (interm_optab
!= unknown_optab
8145 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8146 && insn_data
[icode1
].operand
[0].mode
8147 == insn_data
[icode2
].operand
[0].mode
)
8150 optab1
= interm_optab
;
8155 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8156 intermediate steps in promotion sequence. We try
8157 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8158 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8159 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8161 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8163 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8165 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8168 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8169 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8170 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8171 == CODE_FOR_nothing
))
8174 interm_types
->quick_push (intermediate_type
);
8175 (*multi_step_cvt
)++;
8177 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8180 prev_mode
= intermediate_mode
;
8181 optab1
= interm_optab
;
8184 interm_types
->release ();